xref: /petsc/src/mat/impls/cufft/cufft.cu (revision e133240ea09ca21a17d4f666767adae01b3f0ffe)
1*e133240eSMatthew G Knepley #define PETSCMAT_DLL
2*e133240eSMatthew G Knepley 
3*e133240eSMatthew G Knepley /*
4*e133240eSMatthew G Knepley     Provides an interface to the CUFFT package.
5*e133240eSMatthew G Knepley     Testing examples can be found in ~src/mat/examples/tests
6*e133240eSMatthew G Knepley */
7*e133240eSMatthew G Knepley 
8*e133240eSMatthew G Knepley #include "private/matimpl.h"          /*I "petscmat.h" I*/
9*e133240eSMatthew G Knepley EXTERN_C_BEGIN
10*e133240eSMatthew G Knepley #include <cuda.h>
11*e133240eSMatthew G Knepley #include <cuda_runtime.h>
12*e133240eSMatthew G Knepley #include <cufft.h>
13*e133240eSMatthew G Knepley EXTERN_C_END
14*e133240eSMatthew G Knepley 
15*e133240eSMatthew G Knepley typedef struct {
16*e133240eSMatthew G Knepley   PetscInt      ndim;
17*e133240eSMatthew G Knepley   PetscInt     *dim;
18*e133240eSMatthew G Knepley   cufftHandle   p_forward, p_backward;
19*e133240eSMatthew G Knepley   cufftComplex *devArray;
20*e133240eSMatthew G Knepley } Mat_CUFFT;
21*e133240eSMatthew G Knepley 
22*e133240eSMatthew G Knepley #undef __FUNCT__
23*e133240eSMatthew G Knepley #define __FUNCT__ "MatMult_SeqCUFFT"
24*e133240eSMatthew G Knepley PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y)
25*e133240eSMatthew G Knepley {
26*e133240eSMatthew G Knepley   Mat_CUFFT     *cufft    = (Mat_CUFFT *) A->data;
27*e133240eSMatthew G Knepley   cufftComplex  *devArray = cufft->devArray;
28*e133240eSMatthew G Knepley   PetscInt       ndim     = cufft->ndim, *dim = cufft->dim;
29*e133240eSMatthew G Knepley   PetscScalar   *x_array, *y_array;
30*e133240eSMatthew G Knepley   cufftResult    result;
31*e133240eSMatthew G Knepley   PetscErrorCode ierr;
32*e133240eSMatthew G Knepley 
33*e133240eSMatthew G Knepley   PetscFunctionBegin;
34*e133240eSMatthew G Knepley   ierr = VecGetArray(x, &x_array);CHKERRQ(ierr);
35*e133240eSMatthew G Knepley   ierr = VecGetArray(y, &y_array);CHKERRQ(ierr);
36*e133240eSMatthew G Knepley   if (!cufft->p_forward) {
37*e133240eSMatthew G Knepley     cufftResult result;
38*e133240eSMatthew G Knepley     /* create a plan, then execute it */
39*e133240eSMatthew G Knepley     switch(ndim) {
40*e133240eSMatthew G Knepley     case 1:
41*e133240eSMatthew G Knepley       result = cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS);
42*e133240eSMatthew G Knepley       break;
43*e133240eSMatthew G Knepley     case 2:
44*e133240eSMatthew G Knepley       result = cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
45*e133240eSMatthew G Knepley       break;
46*e133240eSMatthew G Knepley     case 3:
47*e133240eSMatthew G Knepley       result = cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
48*e133240eSMatthew G Knepley       break;
49*e133240eSMatthew G Knepley     default:
50*e133240eSMatthew G Knepley       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim);
51*e133240eSMatthew G Knepley     }
52*e133240eSMatthew G Knepley   }
53*e133240eSMatthew G Knepley   /* transfer to GPU memory */
54*e133240eSMatthew G Knepley   cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);
55*e133240eSMatthew G Knepley   /* execute transform */
56*e133240eSMatthew G Knepley   result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);CHKERRQ(result != CUFFT_SUCCESS);
57*e133240eSMatthew G Knepley   /* transfer from GPU memory */
58*e133240eSMatthew G Knepley   cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);
59*e133240eSMatthew G Knepley   ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr);
60*e133240eSMatthew G Knepley   ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr);
61*e133240eSMatthew G Knepley   PetscFunctionReturn(0);
62*e133240eSMatthew G Knepley }
63*e133240eSMatthew G Knepley 
64*e133240eSMatthew G Knepley #undef __FUNCT__
65*e133240eSMatthew G Knepley #define __FUNCT__ "MatMultTranspose_SeqCUFFT"
66*e133240eSMatthew G Knepley PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y)
67*e133240eSMatthew G Knepley {
68*e133240eSMatthew G Knepley   Mat_CUFFT     *cufft    = (Mat_CUFFT *) A->data;
69*e133240eSMatthew G Knepley   cufftComplex  *devArray = cufft->devArray;
70*e133240eSMatthew G Knepley   PetscInt       ndim     = cufft->ndim, *dim = cufft->dim;
71*e133240eSMatthew G Knepley   PetscScalar   *x_array, *y_array;
72*e133240eSMatthew G Knepley   cufftResult    result;
73*e133240eSMatthew G Knepley   PetscErrorCode ierr;
74*e133240eSMatthew G Knepley 
75*e133240eSMatthew G Knepley   PetscFunctionBegin;
76*e133240eSMatthew G Knepley   ierr = VecGetArray(x, &x_array);CHKERRQ(ierr);
77*e133240eSMatthew G Knepley   ierr = VecGetArray(y, &y_array);CHKERRQ(ierr);
78*e133240eSMatthew G Knepley   if (!cufft->p_backward) {
79*e133240eSMatthew G Knepley     /* create a plan, then execute it */
80*e133240eSMatthew G Knepley     switch(ndim) {
81*e133240eSMatthew G Knepley     case 1:
82*e133240eSMatthew G Knepley       result = cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS);
83*e133240eSMatthew G Knepley       break;
84*e133240eSMatthew G Knepley     case 2:
85*e133240eSMatthew G Knepley       result = cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
86*e133240eSMatthew G Knepley       break;
87*e133240eSMatthew G Knepley     case 3:
88*e133240eSMatthew G Knepley       result = cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
89*e133240eSMatthew G Knepley       break;
90*e133240eSMatthew G Knepley     default:
91*e133240eSMatthew G Knepley       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim);
92*e133240eSMatthew G Knepley     }
93*e133240eSMatthew G Knepley   }
94*e133240eSMatthew G Knepley   /* transfer to GPU memory */
95*e133240eSMatthew G Knepley   cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);
96*e133240eSMatthew G Knepley   /* execute transform */
97*e133240eSMatthew G Knepley   result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);CHKERRQ(result != CUFFT_SUCCESS);
98*e133240eSMatthew G Knepley   /* transfer from GPU memory */
99*e133240eSMatthew G Knepley   cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);
100*e133240eSMatthew G Knepley   ierr = VecRestoreArray(y, &y_array);CHKERRQ(ierr);
101*e133240eSMatthew G Knepley   ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr);
102*e133240eSMatthew G Knepley   PetscFunctionReturn(0);
103*e133240eSMatthew G Knepley }
104*e133240eSMatthew G Knepley 
105*e133240eSMatthew G Knepley #undef __FUNCT__
106*e133240eSMatthew G Knepley #define __FUNCT__ "MatDestroy_SeqCUFFT"
107*e133240eSMatthew G Knepley PetscErrorCode MatDestroy_SeqCUFFT(Mat A)
108*e133240eSMatthew G Knepley {
109*e133240eSMatthew G Knepley   Mat_CUFFT     *cufft = (Mat_CUFFT *) A->data;
110*e133240eSMatthew G Knepley   cufftResult    result;
111*e133240eSMatthew G Knepley   PetscErrorCode ierr;
112*e133240eSMatthew G Knepley 
113*e133240eSMatthew G Knepley   PetscFunctionBegin;
114*e133240eSMatthew G Knepley   ierr = PetscFree(cufft->dim);CHKERRQ(ierr);
115*e133240eSMatthew G Knepley   if (cufft->p_forward)  {result = cufftDestroy(cufft->p_forward);CHKERRQ(result != CUFFT_SUCCESS);}
116*e133240eSMatthew G Knepley   if (cufft->p_backward) {result = cufftDestroy(cufft->p_backward);CHKERRQ(result != CUFFT_SUCCESS);}
117*e133240eSMatthew G Knepley   cudaFree(cufft->devArray);
118*e133240eSMatthew G Knepley   ierr = PetscFree(cufft);CHKERRQ(ierr);
119*e133240eSMatthew G Knepley   ierr = PetscObjectChangeTypeName((PetscObject) A, PETSC_NULL);CHKERRQ(ierr);
120*e133240eSMatthew G Knepley   PetscFunctionReturn(0);
121*e133240eSMatthew G Knepley }
122*e133240eSMatthew G Knepley 
123*e133240eSMatthew G Knepley #undef __FUNCT__
124*e133240eSMatthew G Knepley #define __FUNCT__ "MatCreateSeqCUFFT"
125*e133240eSMatthew G Knepley /*@
126*e133240eSMatthew G Knepley   MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT
127*e133240eSMatthew G Knepley 
128*e133240eSMatthew G Knepley   Collective on MPI_Comm
129*e133240eSMatthew G Knepley 
130*e133240eSMatthew G Knepley   Input Parameters:
131*e133240eSMatthew G Knepley + comm - MPI communicator, set to PETSC_COMM_SELF
132*e133240eSMatthew G Knepley . ndim - the ndim-dimensional transform
133*e133240eSMatthew G Knepley - dim  - array of size ndim, dim[i] contains the vector length in the i-dimension
134*e133240eSMatthew G Knepley 
135*e133240eSMatthew G Knepley   Output Parameter:
136*e133240eSMatthew G Knepley . A - the matrix
137*e133240eSMatthew G Knepley 
138*e133240eSMatthew G Knepley   Options Database Keys:
139*e133240eSMatthew G Knepley . -mat_cufft_plannerflags - set CUFFT planner flags
140*e133240eSMatthew G Knepley 
141*e133240eSMatthew G Knepley   Level: intermediate
142*e133240eSMatthew G Knepley @*/
143*e133240eSMatthew G Knepley PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat* A)
144*e133240eSMatthew G Knepley {
145*e133240eSMatthew G Knepley   Mat_CUFFT     *cufft;
146*e133240eSMatthew G Knepley   PetscInt       m, d;
147*e133240eSMatthew G Knepley   PetscErrorCode ierr;
148*e133240eSMatthew G Knepley 
149*e133240eSMatthew G Knepley   PetscFunctionBegin;
150*e133240eSMatthew G Knepley   if (ndim < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %d must be > 0", ndim);
151*e133240eSMatthew G Knepley   ierr = MatCreate(comm, A);CHKERRQ(ierr);
152*e133240eSMatthew G Knepley   m = 1;
153*e133240eSMatthew G Knepley   for(d = 0; d < ndim; ++d){
154*e133240eSMatthew G Knepley     if (dim[d] < 0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%d]=%d must be > 0", d, dim[d]);
155*e133240eSMatthew G Knepley     m *= dim[d];
156*e133240eSMatthew G Knepley   }
157*e133240eSMatthew G Knepley   ierr = MatSetSizes(*A, m, m, m, m);CHKERRQ(ierr);
158*e133240eSMatthew G Knepley   ierr = PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);CHKERRQ(ierr);
159*e133240eSMatthew G Knepley 
160*e133240eSMatthew G Knepley   ierr = PetscNewLog(*A, Mat_CUFFT, &cufft);CHKERRQ(ierr);
161*e133240eSMatthew G Knepley   (*A)->data = (void*) cufft;
162*e133240eSMatthew G Knepley   ierr = PetscMalloc((ndim+1)*sizeof(PetscInt), &cufft->dim);CHKERRQ(ierr);
163*e133240eSMatthew G Knepley   ierr = PetscMemcpy(cufft->dim, dim, ndim*sizeof(PetscInt));CHKERRQ(ierr);
164*e133240eSMatthew G Knepley   cufft->ndim       = ndim;
165*e133240eSMatthew G Knepley   cufft->p_forward  = 0;
166*e133240eSMatthew G Knepley   cufft->p_backward = 0;
167*e133240eSMatthew G Knepley   cufft->dim[ndim]  = m;
168*e133240eSMatthew G Knepley 
169*e133240eSMatthew G Knepley   /* GPU memory allocation */
170*e133240eSMatthew G Knepley   cudaMalloc((void **) &cufft->devArray, sizeof(cufftComplex)*m);
171*e133240eSMatthew G Knepley 
172*e133240eSMatthew G Knepley   (*A)->ops->mult          = MatMult_SeqCUFFT;
173*e133240eSMatthew G Knepley   (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT;
174*e133240eSMatthew G Knepley   (*A)->assembled          = PETSC_TRUE;
175*e133240eSMatthew G Knepley   (*A)->ops->destroy       = MatDestroy_SeqCUFFT;
176*e133240eSMatthew G Knepley 
177*e133240eSMatthew G Knepley   /* get runtime options */
178*e133240eSMatthew G Knepley   ierr = PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");CHKERRQ(ierr);
179*e133240eSMatthew G Knepley   ierr = PetscOptionsEnd();CHKERRQ(ierr);
180*e133240eSMatthew G Knepley   PetscFunctionReturn(0);
181*e133240eSMatthew G Knepley }
182