xref: /petsc/src/mat/impls/cufft/cufft.cu (revision 5f80ce2ab25dff0f4601e710601cbbcecf323266)
1e133240eSMatthew G Knepley 
2e133240eSMatthew G Knepley /*
3e133240eSMatthew G Knepley     Provides an interface to the CUFFT package.
4c4762a1bSJed Brown     Testing examples can be found in ~src/mat/tests
5e133240eSMatthew G Knepley */
6e133240eSMatthew G Knepley 
7030f984aSJacob Faibussowitsch #include <petscdevice.h>
8af0996ceSBarry Smith #include <petsc/private/matimpl.h>          /*I "petscmat.h" I*/
9e133240eSMatthew G Knepley 
10e133240eSMatthew G Knepley typedef struct {
11e133240eSMatthew G Knepley   PetscInt     ndim;
12e133240eSMatthew G Knepley   PetscInt     *dim;
13e133240eSMatthew G Knepley   cufftHandle  p_forward, p_backward;
14e133240eSMatthew G Knepley   cufftComplex *devArray;
15e133240eSMatthew G Knepley } Mat_CUFFT;
16e133240eSMatthew G Knepley 
17e133240eSMatthew G Knepley PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y)
18e133240eSMatthew G Knepley {
19e133240eSMatthew G Knepley   Mat_CUFFT    *cufft    = (Mat_CUFFT*) A->data;
20e133240eSMatthew G Knepley   cufftComplex *devArray = cufft->devArray;
21e133240eSMatthew G Knepley   PetscInt      ndim     = cufft->ndim, *dim = cufft->dim;
22e133240eSMatthew G Knepley   PetscScalar  *x_array, *y_array;
23e133240eSMatthew G Knepley 
24e133240eSMatthew G Knepley   PetscFunctionBegin;
25*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecGetArray(x, &x_array));
26*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecGetArray(y, &y_array));
27e133240eSMatthew G Knepley   if (!cufft->p_forward) {
28e133240eSMatthew G Knepley     /* create a plan, then execute it */
29e133240eSMatthew G Knepley     switch (ndim) {
30e133240eSMatthew G Knepley     case 1:
31*5f80ce2aSJacob Faibussowitsch       CHKERRCUFFT(cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1));
32e133240eSMatthew G Knepley       break;
33e133240eSMatthew G Knepley     case 2:
34*5f80ce2aSJacob Faibussowitsch       CHKERRCUFFT(cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C));
35e133240eSMatthew G Knepley       break;
36e133240eSMatthew G Knepley     case 3:
37*5f80ce2aSJacob Faibussowitsch       CHKERRCUFFT(cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C));
38e133240eSMatthew G Knepley       break;
39e133240eSMatthew G Knepley     default:
40*5f80ce2aSJacob Faibussowitsch       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
41e133240eSMatthew G Knepley     }
42e133240eSMatthew G Knepley   }
43e133240eSMatthew G Knepley   /* transfer to GPU memory */
44*5f80ce2aSJacob Faibussowitsch   CHKERRCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice));
45e133240eSMatthew G Knepley   /* execute transform */
46*5f80ce2aSJacob Faibussowitsch   CHKERRCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD));
47e133240eSMatthew G Knepley   /* transfer from GPU memory */
48*5f80ce2aSJacob Faibussowitsch   CHKERRCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost));
49*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecRestoreArray(y, &y_array));
50*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecRestoreArray(x, &x_array));
51e133240eSMatthew G Knepley   PetscFunctionReturn(0);
52e133240eSMatthew G Knepley }
53e133240eSMatthew G Knepley 
54e133240eSMatthew G Knepley PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y)
55e133240eSMatthew G Knepley {
56e133240eSMatthew G Knepley   Mat_CUFFT    *cufft    = (Mat_CUFFT*) A->data;
57e133240eSMatthew G Knepley   cufftComplex *devArray = cufft->devArray;
58e133240eSMatthew G Knepley   PetscInt      ndim     = cufft->ndim, *dim = cufft->dim;
59e133240eSMatthew G Knepley   PetscScalar  *x_array, *y_array;
60e133240eSMatthew G Knepley 
61e133240eSMatthew G Knepley   PetscFunctionBegin;
62*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecGetArray(x, &x_array));
63*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecGetArray(y, &y_array));
64e133240eSMatthew G Knepley   if (!cufft->p_backward) {
65e133240eSMatthew G Knepley     /* create a plan, then execute it */
66e133240eSMatthew G Knepley     switch (ndim) {
67e133240eSMatthew G Knepley     case 1:
68*5f80ce2aSJacob Faibussowitsch       CHKERRCUFFT(cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1));
69e133240eSMatthew G Knepley       break;
70e133240eSMatthew G Knepley     case 2:
71*5f80ce2aSJacob Faibussowitsch       CHKERRCUFFT(cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C));
72e133240eSMatthew G Knepley       break;
73e133240eSMatthew G Knepley     case 3:
74*5f80ce2aSJacob Faibussowitsch       CHKERRCUFFT(cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C));
75e133240eSMatthew G Knepley       break;
76e133240eSMatthew G Knepley     default:
77*5f80ce2aSJacob Faibussowitsch       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
78e133240eSMatthew G Knepley     }
79e133240eSMatthew G Knepley   }
80e133240eSMatthew G Knepley   /* transfer to GPU memory */
81*5f80ce2aSJacob Faibussowitsch   CHKERRCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice));
82e133240eSMatthew G Knepley   /* execute transform */
83*5f80ce2aSJacob Faibussowitsch   CHKERRCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE));
84e133240eSMatthew G Knepley   /* transfer from GPU memory */
85*5f80ce2aSJacob Faibussowitsch   CHKERRCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost));
86*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecRestoreArray(y, &y_array));
87*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecRestoreArray(x, &x_array));
88e133240eSMatthew G Knepley   PetscFunctionReturn(0);
89e133240eSMatthew G Knepley }
90e133240eSMatthew G Knepley 
91e133240eSMatthew G Knepley PetscErrorCode MatDestroy_SeqCUFFT(Mat A)
92e133240eSMatthew G Knepley {
93e133240eSMatthew G Knepley   Mat_CUFFT *cufft = (Mat_CUFFT*) A->data;
94e133240eSMatthew G Knepley 
95e133240eSMatthew G Knepley   PetscFunctionBegin;
96*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(cufft->dim));
97*5f80ce2aSJacob Faibussowitsch   if (cufft->p_forward)  CHKERRCUFFT(cufftDestroy(cufft->p_forward));
98*5f80ce2aSJacob Faibussowitsch   if (cufft->p_backward) CHKERRCUFFT(cufftDestroy(cufft->p_backward));
99*5f80ce2aSJacob Faibussowitsch   CHKERRCUDA(cudaFree(cufft->devArray));
100*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(A->data));
101*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectChangeTypeName((PetscObject)A,0));
102e133240eSMatthew G Knepley   PetscFunctionReturn(0);
103e133240eSMatthew G Knepley }
104e133240eSMatthew G Knepley 
105e133240eSMatthew G Knepley /*@
106e133240eSMatthew G Knepley   MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT
107e133240eSMatthew G Knepley 
108d083f849SBarry Smith   Collective
109e133240eSMatthew G Knepley 
110e133240eSMatthew G Knepley   Input Parameters:
111e133240eSMatthew G Knepley + comm - MPI communicator, set to PETSC_COMM_SELF
112e133240eSMatthew G Knepley . ndim - the ndim-dimensional transform
113e133240eSMatthew G Knepley - dim  - array of size ndim, dim[i] contains the vector length in the i-dimension
114e133240eSMatthew G Knepley 
115e133240eSMatthew G Knepley   Output Parameter:
116e133240eSMatthew G Knepley . A - the matrix
117e133240eSMatthew G Knepley 
118e133240eSMatthew G Knepley   Options Database Keys:
119e133240eSMatthew G Knepley . -mat_cufft_plannerflags - set CUFFT planner flags
120e133240eSMatthew G Knepley 
121e133240eSMatthew G Knepley   Level: intermediate
122e133240eSMatthew G Knepley @*/
1237087cfbeSBarry Smith PetscErrorCode  MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A)
124e133240eSMatthew G Knepley {
125e133240eSMatthew G Knepley   Mat_CUFFT *cufft;
126*5f80ce2aSJacob Faibussowitsch   PetscInt   m = 1;
127e133240eSMatthew G Knepley 
128e133240eSMatthew G Knepley   PetscFunctionBegin;
129*5f80ce2aSJacob Faibussowitsch   PetscCheck(ndim >= 0,PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %" PetscInt_FMT " must be > 0", ndim);
130*5f80ce2aSJacob Faibussowitsch   if (ndim) PetscValidIntPointer(dim,3);
131*5f80ce2aSJacob Faibussowitsch   PetscValidPointer(A,4);
132*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatCreate(comm, A));
133*5f80ce2aSJacob Faibussowitsch   for (PetscInt d = 0; d < ndim; ++d) {
134*5f80ce2aSJacob Faibussowitsch     PetscCheck(dim[d] >= 0,PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%" PetscInt_FMT "]=%" PetscInt_FMT " must be > 0", d, dim[d]);
135e133240eSMatthew G Knepley     m *= dim[d];
136e133240eSMatthew G Knepley   }
137*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetSizes(*A, m, m, m, m));
138*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT));
139e133240eSMatthew G Knepley 
140*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscNewLog(*A,&cufft));
141e133240eSMatthew G Knepley   (*A)->data = (void*) cufft;
142*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(ndim+1, &cufft->dim));
143*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscArraycpy(cufft->dim, dim, ndim));
1442205254eSKarl Rupp 
145e133240eSMatthew G Knepley   cufft->ndim       = ndim;
146e133240eSMatthew G Knepley   cufft->p_forward  = 0;
147e133240eSMatthew G Knepley   cufft->p_backward = 0;
148e133240eSMatthew G Knepley   cufft->dim[ndim]  = m;
149e133240eSMatthew G Knepley 
150e133240eSMatthew G Knepley   /* GPU memory allocation */
151*5f80ce2aSJacob Faibussowitsch   CHKERRCUDA(cudaMalloc((void**) &cufft->devArray, sizeof(cufftComplex)*m));
152e133240eSMatthew G Knepley 
153e133240eSMatthew G Knepley   (*A)->ops->mult          = MatMult_SeqCUFFT;
154e133240eSMatthew G Knepley   (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT;
155e133240eSMatthew G Knepley   (*A)->assembled          = PETSC_TRUE;
156e133240eSMatthew G Knepley   (*A)->ops->destroy       = MatDestroy_SeqCUFFT;
157e133240eSMatthew G Knepley 
158*5f80ce2aSJacob Faibussowitsch   /* get runtime options ...what options????? */
159*5f80ce2aSJacob Faibussowitsch   {
160*5f80ce2aSJacob Faibussowitsch     PetscErrorCode ierr;
161*5f80ce2aSJacob Faibussowitsch 
162e133240eSMatthew G Knepley     ierr = PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");CHKERRQ(ierr);
163e133240eSMatthew G Knepley     ierr = PetscOptionsEnd();CHKERRQ(ierr);
164*5f80ce2aSJacob Faibussowitsch   }
165e133240eSMatthew G Knepley   PetscFunctionReturn(0);
166e133240eSMatthew G Knepley }
167