1e133240eSMatthew G Knepley /* 2e133240eSMatthew G Knepley Provides an interface to the CUFFT package. 3c4762a1bSJed Brown Testing examples can be found in ~src/mat/tests 4e133240eSMatthew G Knepley */ 5e133240eSMatthew G Knepley 60e6b6b59SJacob Faibussowitsch #include <petscdevice_cuda.h> 7af0996ceSBarry Smith #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 8e133240eSMatthew G Knepley 9e133240eSMatthew G Knepley typedef struct { 10e133240eSMatthew G Knepley PetscInt ndim; 11e133240eSMatthew G Knepley PetscInt *dim; 12e133240eSMatthew G Knepley cufftHandle p_forward, p_backward; 13e133240eSMatthew G Knepley cufftComplex *devArray; 14e133240eSMatthew G Knepley } Mat_CUFFT; 15e133240eSMatthew G Knepley 1666976f2fSJacob Faibussowitsch static PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y) 17d71ae5a4SJacob Faibussowitsch { 18e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT *)A->data; 19e133240eSMatthew G Knepley cufftComplex *devArray = cufft->devArray; 20e133240eSMatthew G Knepley PetscInt ndim = cufft->ndim, *dim = cufft->dim; 21e133240eSMatthew G Knepley PetscScalar *x_array, *y_array; 22e133240eSMatthew G Knepley 23e133240eSMatthew G Knepley PetscFunctionBegin; 249566063dSJacob Faibussowitsch PetscCall(VecGetArray(x, &x_array)); 259566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 26e133240eSMatthew G Knepley if (!cufft->p_forward) { 27e133240eSMatthew G Knepley /* create a plan, then execute it */ 28e133240eSMatthew G Knepley switch (ndim) { 29d71ae5a4SJacob Faibussowitsch case 1: 30d71ae5a4SJacob Faibussowitsch PetscCallCUFFT(cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1)); 31d71ae5a4SJacob Faibussowitsch break; 32d71ae5a4SJacob Faibussowitsch case 2: 33d71ae5a4SJacob Faibussowitsch PetscCallCUFFT(cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C)); 34d71ae5a4SJacob Faibussowitsch break; 35d71ae5a4SJacob Faibussowitsch case 3: 36d71ae5a4SJacob Faibussowitsch PetscCallCUFFT(cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C)); 37d71ae5a4SJacob Faibussowitsch break; 38d71ae5a4SJacob Faibussowitsch default: 39d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim); 40e133240eSMatthew G Knepley } 41e133240eSMatthew G Knepley } 42e133240eSMatthew G Knepley /* transfer to GPU memory */ 439566063dSJacob Faibussowitsch PetscCallCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice)); 44e133240eSMatthew G Knepley /* execute transform */ 459566063dSJacob Faibussowitsch PetscCallCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD)); 46e133240eSMatthew G Knepley /* transfer from GPU memory */ 479566063dSJacob Faibussowitsch PetscCallCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost)); 489566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 499566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(x, &x_array)); 503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 51e133240eSMatthew G Knepley } 52e133240eSMatthew G Knepley 5366976f2fSJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y) 54d71ae5a4SJacob Faibussowitsch { 55e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT *)A->data; 56e133240eSMatthew G Knepley cufftComplex *devArray = cufft->devArray; 57e133240eSMatthew G Knepley PetscInt ndim = cufft->ndim, *dim = cufft->dim; 58e133240eSMatthew G Knepley PetscScalar *x_array, *y_array; 59e133240eSMatthew G Knepley 60e133240eSMatthew G Knepley PetscFunctionBegin; 619566063dSJacob Faibussowitsch PetscCall(VecGetArray(x, &x_array)); 629566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 63e133240eSMatthew G Knepley if (!cufft->p_backward) { 64e133240eSMatthew G Knepley /* create a plan, then execute it */ 65e133240eSMatthew G Knepley switch (ndim) { 66d71ae5a4SJacob Faibussowitsch case 1: 67d71ae5a4SJacob Faibussowitsch PetscCallCUFFT(cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1)); 68d71ae5a4SJacob Faibussowitsch break; 69d71ae5a4SJacob Faibussowitsch case 2: 70d71ae5a4SJacob Faibussowitsch PetscCallCUFFT(cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C)); 71d71ae5a4SJacob Faibussowitsch break; 72d71ae5a4SJacob Faibussowitsch case 3: 73d71ae5a4SJacob Faibussowitsch PetscCallCUFFT(cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C)); 74d71ae5a4SJacob Faibussowitsch break; 75d71ae5a4SJacob Faibussowitsch default: 76d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim); 77e133240eSMatthew G Knepley } 78e133240eSMatthew G Knepley } 79e133240eSMatthew G Knepley /* transfer to GPU memory */ 809566063dSJacob Faibussowitsch PetscCallCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice)); 81e133240eSMatthew G Knepley /* execute transform */ 829566063dSJacob Faibussowitsch PetscCallCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE)); 83e133240eSMatthew G Knepley /* transfer from GPU memory */ 849566063dSJacob Faibussowitsch PetscCallCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost)); 859566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 869566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(x, &x_array)); 873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 88e133240eSMatthew G Knepley } 89e133240eSMatthew G Knepley 9066976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_SeqCUFFT(Mat A) 91d71ae5a4SJacob Faibussowitsch { 92e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT *)A->data; 93e133240eSMatthew G Knepley 94e133240eSMatthew G Knepley PetscFunctionBegin; 959566063dSJacob Faibussowitsch PetscCall(PetscFree(cufft->dim)); 969566063dSJacob Faibussowitsch if (cufft->p_forward) PetscCallCUFFT(cufftDestroy(cufft->p_forward)); 979566063dSJacob Faibussowitsch if (cufft->p_backward) PetscCallCUFFT(cufftDestroy(cufft->p_backward)); 989566063dSJacob Faibussowitsch PetscCallCUDA(cudaFree(cufft->devArray)); 999566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 1009566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, 0)); 1013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 102e133240eSMatthew G Knepley } 103e133240eSMatthew G Knepley 104e133240eSMatthew G Knepley /*@ 10511a5261eSBarry Smith MatCreateSeqCUFFT - Creates a matrix object that provides `MATSEQCUFFT` via the NVIDIA package CuFFT 106e133240eSMatthew G Knepley 107d083f849SBarry Smith Collective 108e133240eSMatthew G Knepley 109e133240eSMatthew G Knepley Input Parameters: 11011a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF` 111e133240eSMatthew G Knepley . ndim - the ndim-dimensional transform 1122ef1f0ffSBarry Smith - dim - array of size `ndim`, dim[i] contains the vector length in the i-dimension 113e133240eSMatthew G Knepley 114e133240eSMatthew G Knepley Output Parameter: 115e133240eSMatthew G Knepley . A - the matrix 116e133240eSMatthew G Knepley 1172ef1f0ffSBarry Smith Options Database Key: 1182ef1f0ffSBarry Smith . -mat_cufft_plannerflags - set CuFFT planner flags 119e133240eSMatthew G Knepley 120e133240eSMatthew G Knepley Level: intermediate 12111a5261eSBarry Smith 1221cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQCUFFT` 123e133240eSMatthew G Knepley @*/ 124d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A) 125d71ae5a4SJacob Faibussowitsch { 126e133240eSMatthew G Knepley Mat_CUFFT *cufft; 1275f80ce2aSJacob Faibussowitsch PetscInt m = 1; 128e133240eSMatthew G Knepley 129e133240eSMatthew G Knepley PetscFunctionBegin; 1305f80ce2aSJacob Faibussowitsch PetscCheck(ndim >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %" PetscInt_FMT " must be > 0", ndim); 1314f572ea9SToby Isaac if (ndim) PetscAssertPointer(dim, 3); 1324f572ea9SToby Isaac PetscAssertPointer(A, 4); 1339566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 1345f80ce2aSJacob Faibussowitsch for (PetscInt d = 0; d < ndim; ++d) { 1355f80ce2aSJacob Faibussowitsch PetscCheck(dim[d] >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%" PetscInt_FMT "]=%" PetscInt_FMT " must be > 0", d, dim[d]); 136e133240eSMatthew G Knepley m *= dim[d]; 137e133240eSMatthew G Knepley } 1389566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, m, m, m)); 1399566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT)); 140e133240eSMatthew G Knepley 1414dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&cufft)); 142e133240eSMatthew G Knepley (*A)->data = (void *)cufft; 1439566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ndim + 1, &cufft->dim)); 1449566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(cufft->dim, dim, ndim)); 1452205254eSKarl Rupp 146e133240eSMatthew G Knepley cufft->ndim = ndim; 147e133240eSMatthew G Knepley cufft->p_forward = 0; 148e133240eSMatthew G Knepley cufft->p_backward = 0; 149e133240eSMatthew G Knepley cufft->dim[ndim] = m; 150e133240eSMatthew G Knepley 151e133240eSMatthew G Knepley /* GPU memory allocation */ 1529566063dSJacob Faibussowitsch PetscCallCUDA(cudaMalloc((void **)&cufft->devArray, sizeof(cufftComplex) * m)); 153e133240eSMatthew G Knepley 154e133240eSMatthew G Knepley (*A)->ops->mult = MatMult_SeqCUFFT; 155e133240eSMatthew G Knepley (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT; 156e133240eSMatthew G Knepley (*A)->assembled = PETSC_TRUE; 157e133240eSMatthew G Knepley (*A)->ops->destroy = MatDestroy_SeqCUFFT; 158e133240eSMatthew G Knepley 1595f80ce2aSJacob Faibussowitsch /* get runtime options ...what options????? */ 160*f4f49eeaSPierre Jolivet PetscOptionsBegin(comm, ((PetscObject)*A)->prefix, "CUFFT Options", "Mat"); 161d0609cedSBarry Smith PetscOptionsEnd(); 1623ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 163e133240eSMatthew G Knepley } 164