1e133240eSMatthew G Knepley 2e133240eSMatthew G Knepley /* 3e133240eSMatthew G Knepley Provides an interface to the CUFFT package. 4c4762a1bSJed Brown Testing examples can be found in ~src/mat/tests 5e133240eSMatthew G Knepley */ 6e133240eSMatthew G Knepley 70e6b6b59SJacob Faibussowitsch #include <petscdevice_cuda.h> 8af0996ceSBarry Smith #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 9e133240eSMatthew G Knepley 10e133240eSMatthew G Knepley typedef struct { 11e133240eSMatthew G Knepley PetscInt ndim; 12e133240eSMatthew G Knepley PetscInt *dim; 13e133240eSMatthew G Knepley cufftHandle p_forward, p_backward; 14e133240eSMatthew G Knepley cufftComplex *devArray; 15e133240eSMatthew G Knepley } Mat_CUFFT; 16e133240eSMatthew G Knepley 179371c9d4SSatish Balay PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y) { 18e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT *)A->data; 19e133240eSMatthew G Knepley cufftComplex *devArray = cufft->devArray; 20e133240eSMatthew G Knepley PetscInt ndim = cufft->ndim, *dim = cufft->dim; 21e133240eSMatthew G Knepley PetscScalar *x_array, *y_array; 22e133240eSMatthew G Knepley 23e133240eSMatthew G Knepley PetscFunctionBegin; 249566063dSJacob Faibussowitsch PetscCall(VecGetArray(x, &x_array)); 259566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 26e133240eSMatthew G Knepley if (!cufft->p_forward) { 27e133240eSMatthew G Knepley /* create a plan, then execute it */ 28e133240eSMatthew G Knepley switch (ndim) { 299371c9d4SSatish Balay case 1: PetscCallCUFFT(cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1)); break; 309371c9d4SSatish Balay case 2: PetscCallCUFFT(cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C)); break; 319371c9d4SSatish Balay case 3: PetscCallCUFFT(cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C)); break; 329371c9d4SSatish Balay default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim); 33e133240eSMatthew G Knepley } 34e133240eSMatthew G Knepley } 35e133240eSMatthew G Knepley /* transfer to GPU memory */ 369566063dSJacob Faibussowitsch PetscCallCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice)); 37e133240eSMatthew G Knepley /* execute transform */ 389566063dSJacob Faibussowitsch PetscCallCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD)); 39e133240eSMatthew G Knepley /* transfer from GPU memory */ 409566063dSJacob Faibussowitsch PetscCallCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost)); 419566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 429566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(x, &x_array)); 43e133240eSMatthew G Knepley PetscFunctionReturn(0); 44e133240eSMatthew G Knepley } 45e133240eSMatthew G Knepley 469371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y) { 47e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT *)A->data; 48e133240eSMatthew G Knepley cufftComplex *devArray = cufft->devArray; 49e133240eSMatthew G Knepley PetscInt ndim = cufft->ndim, *dim = cufft->dim; 50e133240eSMatthew G Knepley PetscScalar *x_array, *y_array; 51e133240eSMatthew G Knepley 52e133240eSMatthew G Knepley PetscFunctionBegin; 539566063dSJacob Faibussowitsch PetscCall(VecGetArray(x, &x_array)); 549566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 55e133240eSMatthew G Knepley if (!cufft->p_backward) { 56e133240eSMatthew G Knepley /* create a plan, then execute it */ 57e133240eSMatthew G Knepley switch (ndim) { 589371c9d4SSatish Balay case 1: PetscCallCUFFT(cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1)); break; 599371c9d4SSatish Balay case 2: PetscCallCUFFT(cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C)); break; 609371c9d4SSatish Balay case 3: PetscCallCUFFT(cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C)); break; 619371c9d4SSatish Balay default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim); 62e133240eSMatthew G Knepley } 63e133240eSMatthew G Knepley } 64e133240eSMatthew G Knepley /* transfer to GPU memory */ 659566063dSJacob Faibussowitsch PetscCallCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice)); 66e133240eSMatthew G Knepley /* execute transform */ 679566063dSJacob Faibussowitsch PetscCallCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE)); 68e133240eSMatthew G Knepley /* transfer from GPU memory */ 699566063dSJacob Faibussowitsch PetscCallCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost)); 709566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 719566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(x, &x_array)); 72e133240eSMatthew G Knepley PetscFunctionReturn(0); 73e133240eSMatthew G Knepley } 74e133240eSMatthew G Knepley 759371c9d4SSatish Balay PetscErrorCode MatDestroy_SeqCUFFT(Mat A) { 76e133240eSMatthew G Knepley Mat_CUFFT *cufft = (Mat_CUFFT *)A->data; 77e133240eSMatthew G Knepley 78e133240eSMatthew G Knepley PetscFunctionBegin; 799566063dSJacob Faibussowitsch PetscCall(PetscFree(cufft->dim)); 809566063dSJacob Faibussowitsch if (cufft->p_forward) PetscCallCUFFT(cufftDestroy(cufft->p_forward)); 819566063dSJacob Faibussowitsch if (cufft->p_backward) PetscCallCUFFT(cufftDestroy(cufft->p_backward)); 829566063dSJacob Faibussowitsch PetscCallCUDA(cudaFree(cufft->devArray)); 839566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data)); 849566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, 0)); 85e133240eSMatthew G Knepley PetscFunctionReturn(0); 86e133240eSMatthew G Knepley } 87e133240eSMatthew G Knepley 88e133240eSMatthew G Knepley /*@ 8911a5261eSBarry Smith MatCreateSeqCUFFT - Creates a matrix object that provides `MATSEQCUFFT` via the NVIDIA package CuFFT 90e133240eSMatthew G Knepley 91d083f849SBarry Smith Collective 92e133240eSMatthew G Knepley 93e133240eSMatthew G Knepley Input Parameters: 9411a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF` 95e133240eSMatthew G Knepley . ndim - the ndim-dimensional transform 96e133240eSMatthew G Knepley - dim - array of size ndim, dim[i] contains the vector length in the i-dimension 97e133240eSMatthew G Knepley 98e133240eSMatthew G Knepley Output Parameter: 99e133240eSMatthew G Knepley . A - the matrix 100e133240eSMatthew G Knepley 101e133240eSMatthew G Knepley Options Database Keys: 102e133240eSMatthew G Knepley . -mat_cufft_plannerflags - set CUFFT planner flags 103e133240eSMatthew G Knepley 104e133240eSMatthew G Knepley Level: intermediate 10511a5261eSBarry Smith 10611a5261eSBarry Smith .seealso: `MATSEQCUFFT` 107e133240eSMatthew G Knepley @*/ 1089371c9d4SSatish Balay PetscErrorCode MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A) { 109e133240eSMatthew G Knepley Mat_CUFFT *cufft; 1105f80ce2aSJacob Faibussowitsch PetscInt m = 1; 111e133240eSMatthew G Knepley 112e133240eSMatthew G Knepley PetscFunctionBegin; 1135f80ce2aSJacob Faibussowitsch PetscCheck(ndim >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %" PetscInt_FMT " must be > 0", ndim); 1145f80ce2aSJacob Faibussowitsch if (ndim) PetscValidIntPointer(dim, 3); 1155f80ce2aSJacob Faibussowitsch PetscValidPointer(A, 4); 1169566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 1175f80ce2aSJacob Faibussowitsch for (PetscInt d = 0; d < ndim; ++d) { 1185f80ce2aSJacob Faibussowitsch PetscCheck(dim[d] >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%" PetscInt_FMT "]=%" PetscInt_FMT " must be > 0", d, dim[d]); 119e133240eSMatthew G Knepley m *= dim[d]; 120e133240eSMatthew G Knepley } 1219566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, m, m, m)); 1229566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT)); 123e133240eSMatthew G Knepley 124*4dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&cufft)); 125e133240eSMatthew G Knepley (*A)->data = (void *)cufft; 1269566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ndim + 1, &cufft->dim)); 1279566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(cufft->dim, dim, ndim)); 1282205254eSKarl Rupp 129e133240eSMatthew G Knepley cufft->ndim = ndim; 130e133240eSMatthew G Knepley cufft->p_forward = 0; 131e133240eSMatthew G Knepley cufft->p_backward = 0; 132e133240eSMatthew G Knepley cufft->dim[ndim] = m; 133e133240eSMatthew G Knepley 134e133240eSMatthew G Knepley /* GPU memory allocation */ 1359566063dSJacob Faibussowitsch PetscCallCUDA(cudaMalloc((void **)&cufft->devArray, sizeof(cufftComplex) * m)); 136e133240eSMatthew G Knepley 137e133240eSMatthew G Knepley (*A)->ops->mult = MatMult_SeqCUFFT; 138e133240eSMatthew G Knepley (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT; 139e133240eSMatthew G Knepley (*A)->assembled = PETSC_TRUE; 140e133240eSMatthew G Knepley (*A)->ops->destroy = MatDestroy_SeqCUFFT; 141e133240eSMatthew G Knepley 1425f80ce2aSJacob Faibussowitsch /* get runtime options ...what options????? */ 143d0609cedSBarry Smith PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat"); 144d0609cedSBarry Smith PetscOptionsEnd(); 145e133240eSMatthew G Knepley PetscFunctionReturn(0); 146e133240eSMatthew G Knepley } 147