xref: /petsc/src/mat/impls/cufft/cufft.cu (revision 030f984af8d8bb4c203755d35bded3c05b3d83ce)
1e133240eSMatthew G Knepley 
2e133240eSMatthew G Knepley /*
3e133240eSMatthew G Knepley     Provides an interface to the CUFFT package.
4c4762a1bSJed Brown     Testing examples can be found in ~src/mat/tests
5e133240eSMatthew G Knepley */
6e133240eSMatthew G Knepley 
7*030f984aSJacob Faibussowitsch #include <petscdevice.h>
8af0996ceSBarry Smith #include <petsc/private/matimpl.h>          /*I "petscmat.h" I*/
9e133240eSMatthew G Knepley 
10e133240eSMatthew G Knepley typedef struct {
11e133240eSMatthew G Knepley   PetscInt     ndim;
12e133240eSMatthew G Knepley   PetscInt     *dim;
13e133240eSMatthew G Knepley   cufftHandle  p_forward, p_backward;
14e133240eSMatthew G Knepley   cufftComplex *devArray;
15e133240eSMatthew G Knepley } Mat_CUFFT;
16e133240eSMatthew G Knepley 
17e133240eSMatthew G Knepley PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y)
18e133240eSMatthew G Knepley {
19e133240eSMatthew G Knepley   Mat_CUFFT      *cufft    = (Mat_CUFFT*) A->data;
20e133240eSMatthew G Knepley   cufftComplex   *devArray = cufft->devArray;
21e133240eSMatthew G Knepley   PetscInt       ndim      = cufft->ndim, *dim = cufft->dim;
22e133240eSMatthew G Knepley   PetscScalar    *x_array, *y_array;
23e133240eSMatthew G Knepley   cufftResult    result;
24*030f984aSJacob Faibussowitsch   cudaError_t    cerr;
25e133240eSMatthew G Knepley   PetscErrorCode ierr;
26e133240eSMatthew G Knepley 
27e133240eSMatthew G Knepley   PetscFunctionBegin;
28e133240eSMatthew G Knepley   ierr = VecGetArray(x, &x_array);CHKERRQ(ierr);
29e133240eSMatthew G Knepley   ierr = VecGetArray(y, &y_array);CHKERRQ(ierr);
30e133240eSMatthew G Knepley   if (!cufft->p_forward) {
31e133240eSMatthew G Knepley     cufftResult result;
32e133240eSMatthew G Knepley     /* create a plan, then execute it */
33e133240eSMatthew G Knepley     switch (ndim) {
34e133240eSMatthew G Knepley     case 1:
35*030f984aSJacob Faibussowitsch       result = cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);CHKERRCUFFT(result);
36e133240eSMatthew G Knepley       break;
37e133240eSMatthew G Knepley     case 2:
38*030f984aSJacob Faibussowitsch       result = cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);CHKERRCUFFT(result);
39e133240eSMatthew G Knepley       break;
40e133240eSMatthew G Knepley     case 3:
41*030f984aSJacob Faibussowitsch       result = cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRCUFFT(result);
42e133240eSMatthew G Knepley       break;
43e133240eSMatthew G Knepley     default:
44e133240eSMatthew G Knepley       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim);
45e133240eSMatthew G Knepley     }
46e133240eSMatthew G Knepley   }
47e133240eSMatthew G Knepley   /* transfer to GPU memory */
48*030f984aSJacob Faibussowitsch   cerr = cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);CHKERRCUDA(cerr);
49e133240eSMatthew G Knepley   /* execute transform */
50*030f984aSJacob Faibussowitsch   result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);CHKERRCUFFT(result);
51e133240eSMatthew G Knepley   /* transfer from GPU memory */
52*030f984aSJacob Faibussowitsch   cerr = cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);CHKERRCUDA(cerr);
53e133240eSMatthew G Knepley   ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr);
54e133240eSMatthew G Knepley   ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr);
55e133240eSMatthew G Knepley   PetscFunctionReturn(0);
56e133240eSMatthew G Knepley }
57e133240eSMatthew G Knepley 
58e133240eSMatthew G Knepley PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y)
59e133240eSMatthew G Knepley {
60e133240eSMatthew G Knepley   Mat_CUFFT      *cufft    = (Mat_CUFFT*) A->data;
61e133240eSMatthew G Knepley   cufftComplex   *devArray = cufft->devArray;
62e133240eSMatthew G Knepley   PetscInt       ndim      = cufft->ndim, *dim = cufft->dim;
63e133240eSMatthew G Knepley   PetscScalar    *x_array, *y_array;
64e133240eSMatthew G Knepley   cufftResult    result;
65*030f984aSJacob Faibussowitsch   cudaError_t    cerr;
66e133240eSMatthew G Knepley   PetscErrorCode ierr;
67e133240eSMatthew G Knepley 
68e133240eSMatthew G Knepley   PetscFunctionBegin;
69e133240eSMatthew G Knepley   ierr = VecGetArray(x, &x_array);CHKERRQ(ierr);
70e133240eSMatthew G Knepley   ierr = VecGetArray(y, &y_array);CHKERRQ(ierr);
71e133240eSMatthew G Knepley   if (!cufft->p_backward) {
72e133240eSMatthew G Knepley     /* create a plan, then execute it */
73e133240eSMatthew G Knepley     switch (ndim) {
74e133240eSMatthew G Knepley     case 1:
75*030f984aSJacob Faibussowitsch       result = cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);CHKERRCUFFT(result);
76e133240eSMatthew G Knepley       break;
77e133240eSMatthew G Knepley     case 2:
78*030f984aSJacob Faibussowitsch       result = cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);CHKERRCUFFT(result);
79e133240eSMatthew G Knepley       break;
80e133240eSMatthew G Knepley     case 3:
81*030f984aSJacob Faibussowitsch       result = cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRCUFFT(result);
82e133240eSMatthew G Knepley       break;
83e133240eSMatthew G Knepley     default:
84e133240eSMatthew G Knepley       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim);
85e133240eSMatthew G Knepley     }
86e133240eSMatthew G Knepley   }
87e133240eSMatthew G Knepley   /* transfer to GPU memory */
88*030f984aSJacob Faibussowitsch   cerr = cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);CHKERRCUDA(cerr);
89e133240eSMatthew G Knepley   /* execute transform */
90*030f984aSJacob Faibussowitsch   result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);CHKERRCUFFT(result);
91e133240eSMatthew G Knepley   /* transfer from GPU memory */
92*030f984aSJacob Faibussowitsch   cerr = cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);CHKERRCUDA(cerr);
93e133240eSMatthew G Knepley   ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr);
94e133240eSMatthew G Knepley   ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr);
95e133240eSMatthew G Knepley   PetscFunctionReturn(0);
96e133240eSMatthew G Knepley }
97e133240eSMatthew G Knepley 
98e133240eSMatthew G Knepley PetscErrorCode MatDestroy_SeqCUFFT(Mat A)
99e133240eSMatthew G Knepley {
100e133240eSMatthew G Knepley   Mat_CUFFT      *cufft = (Mat_CUFFT*) A->data;
101e133240eSMatthew G Knepley   cufftResult    result;
102*030f984aSJacob Faibussowitsch   cudaError_t    cerr;
103e133240eSMatthew G Knepley   PetscErrorCode ierr;
104e133240eSMatthew G Knepley 
105e133240eSMatthew G Knepley   PetscFunctionBegin;
106e133240eSMatthew G Knepley   ierr = PetscFree(cufft->dim);CHKERRQ(ierr);
107*030f984aSJacob Faibussowitsch   if (cufft->p_forward)  {result = cufftDestroy(cufft->p_forward);CHKERRCUFFT(result);}
108*030f984aSJacob Faibussowitsch   if (cufft->p_backward) {result = cufftDestroy(cufft->p_backward);CHKERRCUFFT(result);}
109*030f984aSJacob Faibussowitsch   cerr = cudaFree(cufft->devArray);CHKERRCUDA(cerr);
110bf0cc555SLisandro Dalcin   ierr = PetscFree(A->data);CHKERRQ(ierr);
111bf0cc555SLisandro Dalcin   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
112e133240eSMatthew G Knepley   PetscFunctionReturn(0);
113e133240eSMatthew G Knepley }
114e133240eSMatthew G Knepley 
115e133240eSMatthew G Knepley /*@
116e133240eSMatthew G Knepley   MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT
117e133240eSMatthew G Knepley 
118d083f849SBarry Smith   Collective
119e133240eSMatthew G Knepley 
120e133240eSMatthew G Knepley   Input Parameters:
121e133240eSMatthew G Knepley + comm - MPI communicator, set to PETSC_COMM_SELF
122e133240eSMatthew G Knepley . ndim - the ndim-dimensional transform
123e133240eSMatthew G Knepley - dim  - array of size ndim, dim[i] contains the vector length in the i-dimension
124e133240eSMatthew G Knepley 
125e133240eSMatthew G Knepley   Output Parameter:
126e133240eSMatthew G Knepley . A - the matrix
127e133240eSMatthew G Knepley 
128e133240eSMatthew G Knepley   Options Database Keys:
129e133240eSMatthew G Knepley . -mat_cufft_plannerflags - set CUFFT planner flags
130e133240eSMatthew G Knepley 
131e133240eSMatthew G Knepley   Level: intermediate
132e133240eSMatthew G Knepley @*/
1337087cfbeSBarry Smith PetscErrorCode  MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A)
134e133240eSMatthew G Knepley {
135e133240eSMatthew G Knepley   Mat_CUFFT      *cufft;
136e133240eSMatthew G Knepley   PetscInt       m, d;
137e133240eSMatthew G Knepley   PetscErrorCode ierr;
138*030f984aSJacob Faibussowitsch   cudaError_t    cerr;
139e133240eSMatthew G Knepley 
140e133240eSMatthew G Knepley   PetscFunctionBegin;
141e133240eSMatthew G Knepley   if (ndim < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %d must be > 0", ndim);
142e133240eSMatthew G Knepley   ierr = MatCreate(comm, A);CHKERRQ(ierr);
143e133240eSMatthew G Knepley   m    = 1;
144e133240eSMatthew G Knepley   for (d = 0; d < ndim; ++d) {
145e133240eSMatthew G Knepley     if (dim[d] < 0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%d]=%d must be > 0", d, dim[d]);
146e133240eSMatthew G Knepley     m *= dim[d];
147e133240eSMatthew G Knepley   }
148e133240eSMatthew G Knepley   ierr = MatSetSizes(*A, m, m, m, m);CHKERRQ(ierr);
149e133240eSMatthew G Knepley   ierr = PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);CHKERRQ(ierr);
150e133240eSMatthew G Knepley 
151b00a9115SJed Brown   ierr       = PetscNewLog(*A,&cufft);CHKERRQ(ierr);
152e133240eSMatthew G Knepley   (*A)->data = (void*) cufft;
153854ce69bSBarry Smith   ierr       = PetscMalloc1(ndim+1, &cufft->dim);CHKERRQ(ierr);
154580bdb30SBarry Smith   ierr       = PetscArraycpy(cufft->dim, dim, ndim);CHKERRQ(ierr);
1552205254eSKarl Rupp 
156e133240eSMatthew G Knepley   cufft->ndim       = ndim;
157e133240eSMatthew G Knepley   cufft->p_forward  = 0;
158e133240eSMatthew G Knepley   cufft->p_backward = 0;
159e133240eSMatthew G Knepley   cufft->dim[ndim]  = m;
160e133240eSMatthew G Knepley 
161e133240eSMatthew G Knepley   /* GPU memory allocation */
162*030f984aSJacob Faibussowitsch   cerr = cudaMalloc((void**) &cufft->devArray, sizeof(cufftComplex)*m);CHKERRCUDA(cerr);
163e133240eSMatthew G Knepley 
164e133240eSMatthew G Knepley   (*A)->ops->mult          = MatMult_SeqCUFFT;
165e133240eSMatthew G Knepley   (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT;
166e133240eSMatthew G Knepley   (*A)->assembled          = PETSC_TRUE;
167e133240eSMatthew G Knepley   (*A)->ops->destroy       = MatDestroy_SeqCUFFT;
168e133240eSMatthew G Knepley 
169e133240eSMatthew G Knepley   /* get runtime options */
170e133240eSMatthew G Knepley   ierr = PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");CHKERRQ(ierr);
171e133240eSMatthew G Knepley   ierr = PetscOptionsEnd();CHKERRQ(ierr);
172e133240eSMatthew G Knepley   PetscFunctionReturn(0);
173e133240eSMatthew G Knepley }
174