1e133240eSMatthew G Knepley #define PETSCMAT_DLL 2e133240eSMatthew G Knepley 3e133240eSMatthew G Knepley /* 4e133240eSMatthew G Knepley Provides an interface to the CUFFT package. 5e133240eSMatthew G Knepley Testing examples can be found in ~src/mat/examples/tests 6e133240eSMatthew G Knepley */ 7e133240eSMatthew G Knepley 8e133240eSMatthew G Knepley #include "private/matimpl.h" /*I "petscmat.h" I*/ 9e133240eSMatthew G Knepley EXTERN_C_BEGIN 10e133240eSMatthew G Knepley #include <cuda.h> 11e133240eSMatthew G Knepley #include <cuda_runtime.h> 12e133240eSMatthew G Knepley #include <cufft.h> 13e133240eSMatthew G Knepley EXTERN_C_END 14e133240eSMatthew G Knepley 15e133240eSMatthew G Knepley typedef struct { 16e133240eSMatthew G Knepley PetscInt ndim; 17e133240eSMatthew G Knepley PetscInt *dim; 18e133240eSMatthew G Knepley cufftHandle p_forward, p_backward; 19e133240eSMatthew G Knepley cufftComplex *devArray; 20e133240eSMatthew G Knepley } Mat_CUFFT; 21e133240eSMatthew G Knepley 22e133240eSMatthew G Knepley #undef __FUNCT__ 23e133240eSMatthew G Knepley #define __FUNCT__ "MatMult_SeqCUFFT" 24e133240eSMatthew G Knepley PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y) 25e133240eSMatthew G Knepley { 26e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT *) A->data; 27e133240eSMatthew G Knepley cufftComplex *devArray = cufft->devArray; 28e133240eSMatthew G Knepley PetscInt ndim = cufft->ndim, *dim = cufft->dim; 29e133240eSMatthew G Knepley PetscScalar *x_array, *y_array; 30e133240eSMatthew G Knepley cufftResult result; 31e133240eSMatthew G Knepley PetscErrorCode ierr; 32e133240eSMatthew G Knepley 33e133240eSMatthew G Knepley PetscFunctionBegin; 34e133240eSMatthew G Knepley ierr = VecGetArray(x, &x_array);CHKERRQ(ierr); 35e133240eSMatthew G Knepley ierr = VecGetArray(y, &y_array);CHKERRQ(ierr); 36e133240eSMatthew G Knepley if (!cufft->p_forward) { 37e133240eSMatthew G Knepley cufftResult result; 38e133240eSMatthew G Knepley /* create a plan, then execute it */ 39e133240eSMatthew G Knepley switch(ndim) { 40e133240eSMatthew G Knepley case 1: 41e133240eSMatthew G Knepley result = cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS); 42e133240eSMatthew G Knepley break; 43e133240eSMatthew G Knepley case 2: 44e133240eSMatthew G Knepley result = cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 45e133240eSMatthew G Knepley break; 46e133240eSMatthew G Knepley case 3: 47e133240eSMatthew G Knepley result = cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 48e133240eSMatthew G Knepley break; 49e133240eSMatthew G Knepley default: 50e133240eSMatthew G Knepley SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim); 51e133240eSMatthew G Knepley } 52e133240eSMatthew G Knepley } 53e133240eSMatthew G Knepley /* transfer to GPU memory */ 54e133240eSMatthew G Knepley cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice); 55e133240eSMatthew G Knepley /* execute transform */ 56e133240eSMatthew G Knepley result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);CHKERRQ(result != CUFFT_SUCCESS); 57e133240eSMatthew G Knepley /* transfer from GPU memory */ 58e133240eSMatthew G Knepley cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost); 59e133240eSMatthew G Knepley ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr); 60e133240eSMatthew G Knepley ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr); 61e133240eSMatthew G Knepley PetscFunctionReturn(0); 62e133240eSMatthew G Knepley } 63e133240eSMatthew G Knepley 64e133240eSMatthew G Knepley #undef __FUNCT__ 65e133240eSMatthew G Knepley #define __FUNCT__ "MatMultTranspose_SeqCUFFT" 66e133240eSMatthew G Knepley PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y) 67e133240eSMatthew G Knepley { 68e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT *) A->data; 69e133240eSMatthew G Knepley cufftComplex *devArray = cufft->devArray; 70e133240eSMatthew G Knepley PetscInt ndim = cufft->ndim, *dim = cufft->dim; 71e133240eSMatthew G Knepley PetscScalar *x_array, *y_array; 72e133240eSMatthew G Knepley cufftResult result; 73e133240eSMatthew G Knepley PetscErrorCode ierr; 74e133240eSMatthew G Knepley 75e133240eSMatthew G Knepley PetscFunctionBegin; 76e133240eSMatthew G Knepley ierr = VecGetArray(x, &x_array);CHKERRQ(ierr); 77e133240eSMatthew G Knepley ierr = VecGetArray(y, &y_array);CHKERRQ(ierr); 78e133240eSMatthew G Knepley if (!cufft->p_backward) { 79e133240eSMatthew G Knepley /* create a plan, then execute it */ 80e133240eSMatthew G Knepley switch(ndim) { 81e133240eSMatthew G Knepley case 1: 82e133240eSMatthew G Knepley result = cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS); 83e133240eSMatthew G Knepley break; 84e133240eSMatthew G Knepley case 2: 85e133240eSMatthew G Knepley result = cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 86e133240eSMatthew G Knepley break; 87e133240eSMatthew G Knepley case 3: 88e133240eSMatthew G Knepley result = cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 89e133240eSMatthew G Knepley break; 90e133240eSMatthew G Knepley default: 91e133240eSMatthew G Knepley SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim); 92e133240eSMatthew G Knepley } 93e133240eSMatthew G Knepley } 94e133240eSMatthew G Knepley /* transfer to GPU memory */ 95e133240eSMatthew G Knepley cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice); 96e133240eSMatthew G Knepley /* execute transform */ 97e133240eSMatthew G Knepley result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);CHKERRQ(result != CUFFT_SUCCESS); 98e133240eSMatthew G Knepley /* transfer from GPU memory */ 99e133240eSMatthew G Knepley cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost); 100e133240eSMatthew G Knepley ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr); 101e133240eSMatthew G Knepley ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr); 102e133240eSMatthew G Knepley PetscFunctionReturn(0); 103e133240eSMatthew G Knepley } 104e133240eSMatthew G Knepley 105e133240eSMatthew G Knepley #undef __FUNCT__ 106e133240eSMatthew G Knepley #define __FUNCT__ "MatDestroy_SeqCUFFT" 107e133240eSMatthew G Knepley PetscErrorCode MatDestroy_SeqCUFFT(Mat A) 108e133240eSMatthew G Knepley { 109e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT *) A->data; 110e133240eSMatthew G Knepley cufftResult result; 111e133240eSMatthew G Knepley PetscErrorCode ierr; 112e133240eSMatthew G Knepley 113e133240eSMatthew G Knepley PetscFunctionBegin; 114e133240eSMatthew G Knepley ierr = PetscFree(cufft->dim);CHKERRQ(ierr); 115e133240eSMatthew G Knepley if (cufft->p_forward) {result = cufftDestroy(cufft->p_forward);CHKERRQ(result != CUFFT_SUCCESS);} 116e133240eSMatthew G Knepley if (cufft->p_backward) {result = cufftDestroy(cufft->p_backward);CHKERRQ(result != CUFFT_SUCCESS);} 117e133240eSMatthew G Knepley cudaFree(cufft->devArray); 118e133240eSMatthew G Knepley ierr = PetscFree(cufft);CHKERRQ(ierr); 119e133240eSMatthew G Knepley ierr = PetscObjectChangeTypeName((PetscObject) A, PETSC_NULL);CHKERRQ(ierr); 120e133240eSMatthew G Knepley PetscFunctionReturn(0); 121e133240eSMatthew G Knepley } 122e133240eSMatthew G Knepley 123e133240eSMatthew G Knepley #undef __FUNCT__ 124e133240eSMatthew G Knepley #define __FUNCT__ "MatCreateSeqCUFFT" 125e133240eSMatthew G Knepley /*@ 126e133240eSMatthew G Knepley MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT 127e133240eSMatthew G Knepley 128e133240eSMatthew G Knepley Collective on MPI_Comm 129e133240eSMatthew G Knepley 130e133240eSMatthew G Knepley Input Parameters: 131e133240eSMatthew G Knepley + comm - MPI communicator, set to PETSC_COMM_SELF 132e133240eSMatthew G Knepley . ndim - the ndim-dimensional transform 133e133240eSMatthew G Knepley - dim - array of size ndim, dim[i] contains the vector length in the i-dimension 134e133240eSMatthew G Knepley 135e133240eSMatthew G Knepley Output Parameter: 136e133240eSMatthew G Knepley . A - the matrix 137e133240eSMatthew G Knepley 138e133240eSMatthew G Knepley Options Database Keys: 139e133240eSMatthew G Knepley . -mat_cufft_plannerflags - set CUFFT planner flags 140e133240eSMatthew G Knepley 141e133240eSMatthew G Knepley Level: intermediate 142e133240eSMatthew G Knepley @*/ 143*7087cfbeSBarry Smith PetscErrorCode MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat* A) 144e133240eSMatthew G Knepley { 145e133240eSMatthew G Knepley Mat_CUFFT *cufft; 146e133240eSMatthew G Knepley PetscInt m, d; 147e133240eSMatthew G Knepley PetscErrorCode ierr; 148e133240eSMatthew G Knepley 149e133240eSMatthew G Knepley PetscFunctionBegin; 150e133240eSMatthew G Knepley if (ndim < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %d must be > 0", ndim); 151e133240eSMatthew G Knepley ierr = MatCreate(comm, A);CHKERRQ(ierr); 152e133240eSMatthew G Knepley m = 1; 153e133240eSMatthew G Knepley for(d = 0; d < ndim; ++d){ 154e133240eSMatthew G Knepley if (dim[d] < 0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%d]=%d must be > 0", d, dim[d]); 155e133240eSMatthew G Knepley m *= dim[d]; 156e133240eSMatthew G Knepley } 157e133240eSMatthew G Knepley ierr = MatSetSizes(*A, m, m, m, m);CHKERRQ(ierr); 158e133240eSMatthew G Knepley ierr = PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);CHKERRQ(ierr); 159e133240eSMatthew G Knepley 160e133240eSMatthew G Knepley ierr = PetscNewLog(*A, Mat_CUFFT, &cufft);CHKERRQ(ierr); 161e133240eSMatthew G Knepley (*A)->data = (void*) cufft; 162e133240eSMatthew G Knepley ierr = PetscMalloc((ndim+1)*sizeof(PetscInt), &cufft->dim);CHKERRQ(ierr); 163e133240eSMatthew G Knepley ierr = PetscMemcpy(cufft->dim, dim, ndim*sizeof(PetscInt));CHKERRQ(ierr); 164e133240eSMatthew G Knepley cufft->ndim = ndim; 165e133240eSMatthew G Knepley cufft->p_forward = 0; 166e133240eSMatthew G Knepley cufft->p_backward = 0; 167e133240eSMatthew G Knepley cufft->dim[ndim] = m; 168e133240eSMatthew G Knepley 169e133240eSMatthew G Knepley /* GPU memory allocation */ 170e133240eSMatthew G Knepley cudaMalloc((void **) &cufft->devArray, sizeof(cufftComplex)*m); 171e133240eSMatthew G Knepley 172e133240eSMatthew G Knepley (*A)->ops->mult = MatMult_SeqCUFFT; 173e133240eSMatthew G Knepley (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT; 174e133240eSMatthew G Knepley (*A)->assembled = PETSC_TRUE; 175e133240eSMatthew G Knepley (*A)->ops->destroy = MatDestroy_SeqCUFFT; 176e133240eSMatthew G Knepley 177e133240eSMatthew G Knepley /* get runtime options */ 178e133240eSMatthew G Knepley ierr = PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");CHKERRQ(ierr); 179e133240eSMatthew G Knepley ierr = PetscOptionsEnd();CHKERRQ(ierr); 180e133240eSMatthew G Knepley PetscFunctionReturn(0); 181e133240eSMatthew G Knepley } 182