1*e133240eSMatthew G Knepley #define PETSCMAT_DLL 2*e133240eSMatthew G Knepley 3*e133240eSMatthew G Knepley /* 4*e133240eSMatthew G Knepley Provides an interface to the CUFFT package. 5*e133240eSMatthew G Knepley Testing examples can be found in ~src/mat/examples/tests 6*e133240eSMatthew G Knepley */ 7*e133240eSMatthew G Knepley 8*e133240eSMatthew G Knepley #include "private/matimpl.h" /*I "petscmat.h" I*/ 9*e133240eSMatthew G Knepley EXTERN_C_BEGIN 10*e133240eSMatthew G Knepley #include <cuda.h> 11*e133240eSMatthew G Knepley #include <cuda_runtime.h> 12*e133240eSMatthew G Knepley #include <cufft.h> 13*e133240eSMatthew G Knepley EXTERN_C_END 14*e133240eSMatthew G Knepley 15*e133240eSMatthew G Knepley typedef struct { 16*e133240eSMatthew G Knepley PetscInt ndim; 17*e133240eSMatthew G Knepley PetscInt *dim; 18*e133240eSMatthew G Knepley cufftHandle p_forward, p_backward; 19*e133240eSMatthew G Knepley cufftComplex *devArray; 20*e133240eSMatthew G Knepley } Mat_CUFFT; 21*e133240eSMatthew G Knepley 22*e133240eSMatthew G Knepley #undef __FUNCT__ 23*e133240eSMatthew G Knepley #define __FUNCT__ "MatMult_SeqCUFFT" 24*e133240eSMatthew G Knepley PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y) 25*e133240eSMatthew G Knepley { 26*e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT *) A->data; 27*e133240eSMatthew G Knepley cufftComplex *devArray = cufft->devArray; 28*e133240eSMatthew G Knepley PetscInt ndim = cufft->ndim, *dim = cufft->dim; 29*e133240eSMatthew G Knepley PetscScalar *x_array, *y_array; 30*e133240eSMatthew G Knepley cufftResult result; 31*e133240eSMatthew G Knepley PetscErrorCode ierr; 32*e133240eSMatthew G Knepley 33*e133240eSMatthew G Knepley PetscFunctionBegin; 34*e133240eSMatthew G Knepley ierr = VecGetArray(x, &x_array);CHKERRQ(ierr); 35*e133240eSMatthew G Knepley ierr = VecGetArray(y, &y_array);CHKERRQ(ierr); 36*e133240eSMatthew G Knepley if (!cufft->p_forward) { 37*e133240eSMatthew G Knepley cufftResult result; 38*e133240eSMatthew G Knepley /* create a plan, then execute it */ 39*e133240eSMatthew G Knepley switch(ndim) { 40*e133240eSMatthew G Knepley case 1: 41*e133240eSMatthew G Knepley result = cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS); 42*e133240eSMatthew G Knepley break; 43*e133240eSMatthew G Knepley case 2: 44*e133240eSMatthew G Knepley result = cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 45*e133240eSMatthew G Knepley break; 46*e133240eSMatthew G Knepley case 3: 47*e133240eSMatthew G Knepley result = cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 48*e133240eSMatthew G Knepley break; 49*e133240eSMatthew G Knepley default: 50*e133240eSMatthew G Knepley SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim); 51*e133240eSMatthew G Knepley } 52*e133240eSMatthew G Knepley } 53*e133240eSMatthew G Knepley /* transfer to GPU memory */ 54*e133240eSMatthew G Knepley cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice); 55*e133240eSMatthew G Knepley /* execute transform */ 56*e133240eSMatthew G Knepley result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);CHKERRQ(result != CUFFT_SUCCESS); 57*e133240eSMatthew G Knepley /* transfer from GPU memory */ 58*e133240eSMatthew G Knepley cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost); 59*e133240eSMatthew G Knepley ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr); 60*e133240eSMatthew G Knepley ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr); 61*e133240eSMatthew G Knepley PetscFunctionReturn(0); 62*e133240eSMatthew G Knepley } 63*e133240eSMatthew G Knepley 64*e133240eSMatthew G Knepley #undef __FUNCT__ 65*e133240eSMatthew G Knepley #define __FUNCT__ "MatMultTranspose_SeqCUFFT" 66*e133240eSMatthew G Knepley PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y) 67*e133240eSMatthew G Knepley { 68*e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT *) A->data; 69*e133240eSMatthew G Knepley cufftComplex *devArray = cufft->devArray; 70*e133240eSMatthew G Knepley PetscInt ndim = cufft->ndim, *dim = cufft->dim; 71*e133240eSMatthew G Knepley PetscScalar *x_array, *y_array; 72*e133240eSMatthew G Knepley cufftResult result; 73*e133240eSMatthew G Knepley PetscErrorCode ierr; 74*e133240eSMatthew G Knepley 75*e133240eSMatthew G Knepley PetscFunctionBegin; 76*e133240eSMatthew G Knepley ierr = VecGetArray(x, &x_array);CHKERRQ(ierr); 77*e133240eSMatthew G Knepley ierr = VecGetArray(y, &y_array);CHKERRQ(ierr); 78*e133240eSMatthew G Knepley if (!cufft->p_backward) { 79*e133240eSMatthew G Knepley /* create a plan, then execute it */ 80*e133240eSMatthew G Knepley switch(ndim) { 81*e133240eSMatthew G Knepley case 1: 82*e133240eSMatthew G Knepley result = cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS); 83*e133240eSMatthew G Knepley break; 84*e133240eSMatthew G Knepley case 2: 85*e133240eSMatthew G Knepley result = cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 86*e133240eSMatthew G Knepley break; 87*e133240eSMatthew G Knepley case 3: 88*e133240eSMatthew G Knepley result = cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS); 89*e133240eSMatthew G Knepley break; 90*e133240eSMatthew G Knepley default: 91*e133240eSMatthew G Knepley SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim); 92*e133240eSMatthew G Knepley } 93*e133240eSMatthew G Knepley } 94*e133240eSMatthew G Knepley /* transfer to GPU memory */ 95*e133240eSMatthew G Knepley cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice); 96*e133240eSMatthew G Knepley /* execute transform */ 97*e133240eSMatthew G Knepley result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);CHKERRQ(result != CUFFT_SUCCESS); 98*e133240eSMatthew G Knepley /* transfer from GPU memory */ 99*e133240eSMatthew G Knepley cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost); 100*e133240eSMatthew G Knepley ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr); 101*e133240eSMatthew G Knepley ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr); 102*e133240eSMatthew G Knepley PetscFunctionReturn(0); 103*e133240eSMatthew G Knepley } 104*e133240eSMatthew G Knepley 105*e133240eSMatthew G Knepley #undef __FUNCT__ 106*e133240eSMatthew G Knepley #define __FUNCT__ "MatDestroy_SeqCUFFT" 107*e133240eSMatthew G Knepley PetscErrorCode MatDestroy_SeqCUFFT(Mat A) 108*e133240eSMatthew G Knepley { 109*e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT *) A->data; 110*e133240eSMatthew G Knepley cufftResult result; 111*e133240eSMatthew G Knepley PetscErrorCode ierr; 112*e133240eSMatthew G Knepley 113*e133240eSMatthew G Knepley PetscFunctionBegin; 114*e133240eSMatthew G Knepley ierr = PetscFree(cufft->dim);CHKERRQ(ierr); 115*e133240eSMatthew G Knepley if (cufft->p_forward) {result = cufftDestroy(cufft->p_forward);CHKERRQ(result != CUFFT_SUCCESS);} 116*e133240eSMatthew G Knepley if (cufft->p_backward) {result = cufftDestroy(cufft->p_backward);CHKERRQ(result != CUFFT_SUCCESS);} 117*e133240eSMatthew G Knepley cudaFree(cufft->devArray); 118*e133240eSMatthew G Knepley ierr = PetscFree(cufft);CHKERRQ(ierr); 119*e133240eSMatthew G Knepley ierr = PetscObjectChangeTypeName((PetscObject) A, PETSC_NULL);CHKERRQ(ierr); 120*e133240eSMatthew G Knepley PetscFunctionReturn(0); 121*e133240eSMatthew G Knepley } 122*e133240eSMatthew G Knepley 123*e133240eSMatthew G Knepley #undef __FUNCT__ 124*e133240eSMatthew G Knepley #define __FUNCT__ "MatCreateSeqCUFFT" 125*e133240eSMatthew G Knepley /*@ 126*e133240eSMatthew G Knepley MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT 127*e133240eSMatthew G Knepley 128*e133240eSMatthew G Knepley Collective on MPI_Comm 129*e133240eSMatthew G Knepley 130*e133240eSMatthew G Knepley Input Parameters: 131*e133240eSMatthew G Knepley + comm - MPI communicator, set to PETSC_COMM_SELF 132*e133240eSMatthew G Knepley . ndim - the ndim-dimensional transform 133*e133240eSMatthew G Knepley - dim - array of size ndim, dim[i] contains the vector length in the i-dimension 134*e133240eSMatthew G Knepley 135*e133240eSMatthew G Knepley Output Parameter: 136*e133240eSMatthew G Knepley . A - the matrix 137*e133240eSMatthew G Knepley 138*e133240eSMatthew G Knepley Options Database Keys: 139*e133240eSMatthew G Knepley . -mat_cufft_plannerflags - set CUFFT planner flags 140*e133240eSMatthew G Knepley 141*e133240eSMatthew G Knepley Level: intermediate 142*e133240eSMatthew G Knepley @*/ 143*e133240eSMatthew G Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat* A) 144*e133240eSMatthew G Knepley { 145*e133240eSMatthew G Knepley Mat_CUFFT *cufft; 146*e133240eSMatthew G Knepley PetscInt m, d; 147*e133240eSMatthew G Knepley PetscErrorCode ierr; 148*e133240eSMatthew G Knepley 149*e133240eSMatthew G Knepley PetscFunctionBegin; 150*e133240eSMatthew G Knepley if (ndim < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %d must be > 0", ndim); 151*e133240eSMatthew G Knepley ierr = MatCreate(comm, A);CHKERRQ(ierr); 152*e133240eSMatthew G Knepley m = 1; 153*e133240eSMatthew G Knepley for(d = 0; d < ndim; ++d){ 154*e133240eSMatthew G Knepley if (dim[d] < 0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%d]=%d must be > 0", d, dim[d]); 155*e133240eSMatthew G Knepley m *= dim[d]; 156*e133240eSMatthew G Knepley } 157*e133240eSMatthew G Knepley ierr = MatSetSizes(*A, m, m, m, m);CHKERRQ(ierr); 158*e133240eSMatthew G Knepley ierr = PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);CHKERRQ(ierr); 159*e133240eSMatthew G Knepley 160*e133240eSMatthew G Knepley ierr = PetscNewLog(*A, Mat_CUFFT, &cufft);CHKERRQ(ierr); 161*e133240eSMatthew G Knepley (*A)->data = (void*) cufft; 162*e133240eSMatthew G Knepley ierr = PetscMalloc((ndim+1)*sizeof(PetscInt), &cufft->dim);CHKERRQ(ierr); 163*e133240eSMatthew G Knepley ierr = PetscMemcpy(cufft->dim, dim, ndim*sizeof(PetscInt));CHKERRQ(ierr); 164*e133240eSMatthew G Knepley cufft->ndim = ndim; 165*e133240eSMatthew G Knepley cufft->p_forward = 0; 166*e133240eSMatthew G Knepley cufft->p_backward = 0; 167*e133240eSMatthew G Knepley cufft->dim[ndim] = m; 168*e133240eSMatthew G Knepley 169*e133240eSMatthew G Knepley /* GPU memory allocation */ 170*e133240eSMatthew G Knepley cudaMalloc((void **) &cufft->devArray, sizeof(cufftComplex)*m); 171*e133240eSMatthew G Knepley 172*e133240eSMatthew G Knepley (*A)->ops->mult = MatMult_SeqCUFFT; 173*e133240eSMatthew G Knepley (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT; 174*e133240eSMatthew G Knepley (*A)->assembled = PETSC_TRUE; 175*e133240eSMatthew G Knepley (*A)->ops->destroy = MatDestroy_SeqCUFFT; 176*e133240eSMatthew G Knepley 177*e133240eSMatthew G Knepley /* get runtime options */ 178*e133240eSMatthew G Knepley ierr = PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");CHKERRQ(ierr); 179*e133240eSMatthew G Knepley ierr = PetscOptionsEnd();CHKERRQ(ierr); 180*e133240eSMatthew G Knepley PetscFunctionReturn(0); 181*e133240eSMatthew G Knepley } 182