xref: /petsc/src/mat/impls/cufft/cufft.cu (revision 4dfa11a44d5adf2389f1d3acbc8f3c1116dc6c3a)
1e133240eSMatthew G Knepley 
2e133240eSMatthew G Knepley /*
3e133240eSMatthew G Knepley     Provides an interface to the CUFFT package.
4c4762a1bSJed Brown     Testing examples can be found in ~src/mat/tests
5e133240eSMatthew G Knepley */
6e133240eSMatthew G Knepley 
70e6b6b59SJacob Faibussowitsch #include <petscdevice_cuda.h>
8af0996ceSBarry Smith #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/
9e133240eSMatthew G Knepley 
10e133240eSMatthew G Knepley typedef struct {
11e133240eSMatthew G Knepley   PetscInt      ndim;
12e133240eSMatthew G Knepley   PetscInt     *dim;
13e133240eSMatthew G Knepley   cufftHandle   p_forward, p_backward;
14e133240eSMatthew G Knepley   cufftComplex *devArray;
15e133240eSMatthew G Knepley } Mat_CUFFT;
16e133240eSMatthew G Knepley 
179371c9d4SSatish Balay PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y) {
18e133240eSMatthew G Knepley   Mat_CUFFT    *cufft    = (Mat_CUFFT *)A->data;
19e133240eSMatthew G Knepley   cufftComplex *devArray = cufft->devArray;
20e133240eSMatthew G Knepley   PetscInt      ndim = cufft->ndim, *dim = cufft->dim;
21e133240eSMatthew G Knepley   PetscScalar  *x_array, *y_array;
22e133240eSMatthew G Knepley 
23e133240eSMatthew G Knepley   PetscFunctionBegin;
249566063dSJacob Faibussowitsch   PetscCall(VecGetArray(x, &x_array));
259566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
26e133240eSMatthew G Knepley   if (!cufft->p_forward) {
27e133240eSMatthew G Knepley     /* create a plan, then execute it */
28e133240eSMatthew G Knepley     switch (ndim) {
299371c9d4SSatish Balay     case 1: PetscCallCUFFT(cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1)); break;
309371c9d4SSatish Balay     case 2: PetscCallCUFFT(cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C)); break;
319371c9d4SSatish Balay     case 3: PetscCallCUFFT(cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C)); break;
329371c9d4SSatish Balay     default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
33e133240eSMatthew G Knepley     }
34e133240eSMatthew G Knepley   }
35e133240eSMatthew G Knepley   /* transfer to GPU memory */
369566063dSJacob Faibussowitsch   PetscCallCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice));
37e133240eSMatthew G Knepley   /* execute transform */
389566063dSJacob Faibussowitsch   PetscCallCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD));
39e133240eSMatthew G Knepley   /* transfer from GPU memory */
409566063dSJacob Faibussowitsch   PetscCallCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost));
419566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
429566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(x, &x_array));
43e133240eSMatthew G Knepley   PetscFunctionReturn(0);
44e133240eSMatthew G Knepley }
45e133240eSMatthew G Knepley 
469371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y) {
47e133240eSMatthew G Knepley   Mat_CUFFT    *cufft    = (Mat_CUFFT *)A->data;
48e133240eSMatthew G Knepley   cufftComplex *devArray = cufft->devArray;
49e133240eSMatthew G Knepley   PetscInt      ndim = cufft->ndim, *dim = cufft->dim;
50e133240eSMatthew G Knepley   PetscScalar  *x_array, *y_array;
51e133240eSMatthew G Knepley 
52e133240eSMatthew G Knepley   PetscFunctionBegin;
539566063dSJacob Faibussowitsch   PetscCall(VecGetArray(x, &x_array));
549566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
55e133240eSMatthew G Knepley   if (!cufft->p_backward) {
56e133240eSMatthew G Knepley     /* create a plan, then execute it */
57e133240eSMatthew G Knepley     switch (ndim) {
589371c9d4SSatish Balay     case 1: PetscCallCUFFT(cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1)); break;
599371c9d4SSatish Balay     case 2: PetscCallCUFFT(cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C)); break;
609371c9d4SSatish Balay     case 3: PetscCallCUFFT(cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C)); break;
619371c9d4SSatish Balay     default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
62e133240eSMatthew G Knepley     }
63e133240eSMatthew G Knepley   }
64e133240eSMatthew G Knepley   /* transfer to GPU memory */
659566063dSJacob Faibussowitsch   PetscCallCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice));
66e133240eSMatthew G Knepley   /* execute transform */
679566063dSJacob Faibussowitsch   PetscCallCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE));
68e133240eSMatthew G Knepley   /* transfer from GPU memory */
699566063dSJacob Faibussowitsch   PetscCallCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost));
709566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
719566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(x, &x_array));
72e133240eSMatthew G Knepley   PetscFunctionReturn(0);
73e133240eSMatthew G Knepley }
74e133240eSMatthew G Knepley 
759371c9d4SSatish Balay PetscErrorCode MatDestroy_SeqCUFFT(Mat A) {
76e133240eSMatthew G Knepley   Mat_CUFFT *cufft = (Mat_CUFFT *)A->data;
77e133240eSMatthew G Knepley 
78e133240eSMatthew G Knepley   PetscFunctionBegin;
799566063dSJacob Faibussowitsch   PetscCall(PetscFree(cufft->dim));
809566063dSJacob Faibussowitsch   if (cufft->p_forward) PetscCallCUFFT(cufftDestroy(cufft->p_forward));
819566063dSJacob Faibussowitsch   if (cufft->p_backward) PetscCallCUFFT(cufftDestroy(cufft->p_backward));
829566063dSJacob Faibussowitsch   PetscCallCUDA(cudaFree(cufft->devArray));
839566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
849566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, 0));
85e133240eSMatthew G Knepley   PetscFunctionReturn(0);
86e133240eSMatthew G Knepley }
87e133240eSMatthew G Knepley 
88e133240eSMatthew G Knepley /*@
8911a5261eSBarry Smith   MatCreateSeqCUFFT - Creates a matrix object that provides `MATSEQCUFFT` via the NVIDIA package CuFFT
90e133240eSMatthew G Knepley 
91d083f849SBarry Smith   Collective
92e133240eSMatthew G Knepley 
93e133240eSMatthew G Knepley   Input Parameters:
9411a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF`
95e133240eSMatthew G Knepley . ndim - the ndim-dimensional transform
96e133240eSMatthew G Knepley - dim  - array of size ndim, dim[i] contains the vector length in the i-dimension
97e133240eSMatthew G Knepley 
98e133240eSMatthew G Knepley   Output Parameter:
99e133240eSMatthew G Knepley . A - the matrix
100e133240eSMatthew G Knepley 
101e133240eSMatthew G Knepley   Options Database Keys:
102e133240eSMatthew G Knepley . -mat_cufft_plannerflags - set CUFFT planner flags
103e133240eSMatthew G Knepley 
104e133240eSMatthew G Knepley   Level: intermediate
10511a5261eSBarry Smith 
10611a5261eSBarry Smith .seealso: `MATSEQCUFFT`
107e133240eSMatthew G Knepley @*/
1089371c9d4SSatish Balay PetscErrorCode MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A) {
109e133240eSMatthew G Knepley   Mat_CUFFT *cufft;
1105f80ce2aSJacob Faibussowitsch   PetscInt   m = 1;
111e133240eSMatthew G Knepley 
112e133240eSMatthew G Knepley   PetscFunctionBegin;
1135f80ce2aSJacob Faibussowitsch   PetscCheck(ndim >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %" PetscInt_FMT " must be > 0", ndim);
1145f80ce2aSJacob Faibussowitsch   if (ndim) PetscValidIntPointer(dim, 3);
1155f80ce2aSJacob Faibussowitsch   PetscValidPointer(A, 4);
1169566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
1175f80ce2aSJacob Faibussowitsch   for (PetscInt d = 0; d < ndim; ++d) {
1185f80ce2aSJacob Faibussowitsch     PetscCheck(dim[d] >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%" PetscInt_FMT "]=%" PetscInt_FMT " must be > 0", d, dim[d]);
119e133240eSMatthew G Knepley     m *= dim[d];
120e133240eSMatthew G Knepley   }
1219566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, m, m, m));
1229566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT));
123e133240eSMatthew G Knepley 
124*4dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&cufft));
125e133240eSMatthew G Knepley   (*A)->data = (void *)cufft;
1269566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ndim + 1, &cufft->dim));
1279566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(cufft->dim, dim, ndim));
1282205254eSKarl Rupp 
129e133240eSMatthew G Knepley   cufft->ndim       = ndim;
130e133240eSMatthew G Knepley   cufft->p_forward  = 0;
131e133240eSMatthew G Knepley   cufft->p_backward = 0;
132e133240eSMatthew G Knepley   cufft->dim[ndim]  = m;
133e133240eSMatthew G Knepley 
134e133240eSMatthew G Knepley   /* GPU memory allocation */
1359566063dSJacob Faibussowitsch   PetscCallCUDA(cudaMalloc((void **)&cufft->devArray, sizeof(cufftComplex) * m));
136e133240eSMatthew G Knepley 
137e133240eSMatthew G Knepley   (*A)->ops->mult          = MatMult_SeqCUFFT;
138e133240eSMatthew G Knepley   (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT;
139e133240eSMatthew G Knepley   (*A)->assembled          = PETSC_TRUE;
140e133240eSMatthew G Knepley   (*A)->ops->destroy       = MatDestroy_SeqCUFFT;
141e133240eSMatthew G Knepley 
1425f80ce2aSJacob Faibussowitsch   /* get runtime options ...what options????? */
143d0609cedSBarry Smith   PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");
144d0609cedSBarry Smith   PetscOptionsEnd();
145e133240eSMatthew G Knepley   PetscFunctionReturn(0);
146e133240eSMatthew G Knepley }
147