xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision b2b77a04a644fb8949a04a8ba0dae8d616b7156d)
1*b2b77a04SHong Zhang #define PETSCMAT_DLL
2*b2b77a04SHong Zhang 
3*b2b77a04SHong Zhang /*
4*b2b77a04SHong Zhang     Provides an interface to the FFTW package.
5*b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
6*b2b77a04SHong Zhang */
7*b2b77a04SHong Zhang 
8*b2b77a04SHong Zhang #include "../src/mat/impls/fft/fft.h"   /*I "petscmat.h" I*/
9*b2b77a04SHong Zhang EXTERN_C_BEGIN
10*b2b77a04SHong Zhang #include "fftw3-mpi.h"
11*b2b77a04SHong Zhang EXTERN_C_END
12*b2b77a04SHong Zhang 
13*b2b77a04SHong Zhang typedef struct {
14*b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
15*b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
16*b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
17*b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
18*b2b77a04SHong Zhang } Mat_FFTW;
19*b2b77a04SHong Zhang 
20*b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
21*b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
22*b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
23*b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
24*b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
25*b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
26*b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
27*b2b77a04SHong Zhang 
28*b2b77a04SHong Zhang #undef __FUNCT__
29*b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
30*b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
31*b2b77a04SHong Zhang {
32*b2b77a04SHong Zhang   PetscErrorCode ierr;
33*b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
34*b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
35*b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
36*b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
37*b2b77a04SHong Zhang 
38*b2b77a04SHong Zhang   PetscFunctionBegin;
39*b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
40*b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
41*b2b77a04SHong Zhang #endif
42*b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
43*b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
44*b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
45*b2b77a04SHong Zhang     switch (ndim){
46*b2b77a04SHong Zhang     case 1:
47*b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
48*b2b77a04SHong Zhang       break;
49*b2b77a04SHong Zhang     case 2:
50*b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
51*b2b77a04SHong Zhang       break;
52*b2b77a04SHong Zhang     case 3:
53*b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
54*b2b77a04SHong Zhang       break;
55*b2b77a04SHong Zhang     default:
56*b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
57*b2b77a04SHong Zhang       break;
58*b2b77a04SHong Zhang     }
59*b2b77a04SHong Zhang     fftw->finarray  = x_array;
60*b2b77a04SHong Zhang     fftw->foutarray = y_array;
61*b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
62*b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
63*b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
64*b2b77a04SHong Zhang   } else { /* use existing plan */
65*b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
66*b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
67*b2b77a04SHong Zhang     } else {
68*b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
69*b2b77a04SHong Zhang     }
70*b2b77a04SHong Zhang   }
71*b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
72*b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
73*b2b77a04SHong Zhang   PetscFunctionReturn(0);
74*b2b77a04SHong Zhang }
75*b2b77a04SHong Zhang 
76*b2b77a04SHong Zhang #undef __FUNCT__
77*b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
78*b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
79*b2b77a04SHong Zhang {
80*b2b77a04SHong Zhang   PetscErrorCode ierr;
81*b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
82*b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
83*b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
84*b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
85*b2b77a04SHong Zhang 
86*b2b77a04SHong Zhang   PetscFunctionBegin;
87*b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
88*b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
89*b2b77a04SHong Zhang #endif
90*b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
91*b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
92*b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
93*b2b77a04SHong Zhang     switch (ndim){
94*b2b77a04SHong Zhang     case 1:
95*b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
96*b2b77a04SHong Zhang       break;
97*b2b77a04SHong Zhang     case 2:
98*b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
99*b2b77a04SHong Zhang       break;
100*b2b77a04SHong Zhang     case 3:
101*b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
102*b2b77a04SHong Zhang       break;
103*b2b77a04SHong Zhang     default:
104*b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
105*b2b77a04SHong Zhang       break;
106*b2b77a04SHong Zhang     }
107*b2b77a04SHong Zhang     fftw->binarray  = x_array;
108*b2b77a04SHong Zhang     fftw->boutarray = y_array;
109*b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
110*b2b77a04SHong Zhang   } else { /* use existing plan */
111*b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
112*b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
113*b2b77a04SHong Zhang     } else {
114*b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
115*b2b77a04SHong Zhang     }
116*b2b77a04SHong Zhang   }
117*b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
118*b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
119*b2b77a04SHong Zhang   PetscFunctionReturn(0);
120*b2b77a04SHong Zhang }
121*b2b77a04SHong Zhang 
122*b2b77a04SHong Zhang #undef __FUNCT__
123*b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
124*b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
125*b2b77a04SHong Zhang {
126*b2b77a04SHong Zhang   PetscErrorCode ierr;
127*b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
128*b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
129*b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
130*b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
131*b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
132*b2b77a04SHong Zhang 
133*b2b77a04SHong Zhang   PetscFunctionBegin;
134*b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
135*b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
136*b2b77a04SHong Zhang #endif
137*b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
138*b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
139*b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
140*b2b77a04SHong Zhang     switch (ndim){
141*b2b77a04SHong Zhang     case 1:
142*b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
143*b2b77a04SHong Zhang       break;
144*b2b77a04SHong Zhang     case 2:
145*b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
146*b2b77a04SHong Zhang       break;
147*b2b77a04SHong Zhang     case 3:
148*b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
149*b2b77a04SHong Zhang       break;
150*b2b77a04SHong Zhang     default:
151*b2b77a04SHong Zhang       /*
152*b2b77a04SHong Zhang        fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
153*b2b77a04SHong Zhang        */
154*b2b77a04SHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not supported yet");
155*b2b77a04SHong Zhang       break;
156*b2b77a04SHong Zhang     }
157*b2b77a04SHong Zhang     fftw->finarray  = x_array;
158*b2b77a04SHong Zhang     fftw->foutarray = y_array;
159*b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
160*b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
161*b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
162*b2b77a04SHong Zhang   } else { /* use existing plan */
163*b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
164*b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
165*b2b77a04SHong Zhang     } else {
166*b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
167*b2b77a04SHong Zhang     }
168*b2b77a04SHong Zhang   }
169*b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
170*b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
171*b2b77a04SHong Zhang   PetscFunctionReturn(0);
172*b2b77a04SHong Zhang }
173*b2b77a04SHong Zhang 
174*b2b77a04SHong Zhang #undef __FUNCT__
175*b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
176*b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
177*b2b77a04SHong Zhang {
178*b2b77a04SHong Zhang   PetscErrorCode ierr;
179*b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
180*b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
181*b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
182*b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
183*b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
184*b2b77a04SHong Zhang 
185*b2b77a04SHong Zhang   PetscFunctionBegin;
186*b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
187*b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
188*b2b77a04SHong Zhang #endif
189*b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
190*b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
191*b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
192*b2b77a04SHong Zhang     switch (ndim){
193*b2b77a04SHong Zhang     case 1:
194*b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
195*b2b77a04SHong Zhang       break;
196*b2b77a04SHong Zhang     case 2:
197*b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
198*b2b77a04SHong Zhang       break;
199*b2b77a04SHong Zhang     case 3:
200*b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
201*b2b77a04SHong Zhang       break;
202*b2b77a04SHong Zhang     default:
203*b2b77a04SHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not supported yet");
204*b2b77a04SHong Zhang       /* fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); */
205*b2b77a04SHong Zhang       break;
206*b2b77a04SHong Zhang     }
207*b2b77a04SHong Zhang     fftw->binarray  = x_array;
208*b2b77a04SHong Zhang     fftw->boutarray = y_array;
209*b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
210*b2b77a04SHong Zhang   } else { /* use existing plan */
211*b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
212*b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
213*b2b77a04SHong Zhang     } else {
214*b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
215*b2b77a04SHong Zhang     }
216*b2b77a04SHong Zhang   }
217*b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
218*b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
219*b2b77a04SHong Zhang   PetscFunctionReturn(0);
220*b2b77a04SHong Zhang }
221*b2b77a04SHong Zhang 
222*b2b77a04SHong Zhang #undef __FUNCT__
223*b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
224*b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
225*b2b77a04SHong Zhang {
226*b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
227*b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
228*b2b77a04SHong Zhang   PetscErrorCode ierr;
229*b2b77a04SHong Zhang 
230*b2b77a04SHong Zhang   PetscFunctionBegin;
231*b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
232*b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
233*b2b77a04SHong Zhang #endif
234*b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
235*b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
236*b2b77a04SHong Zhang   ierr = PetscFree(fftw);CHKERRQ(ierr);
237*b2b77a04SHong Zhang   PetscFunctionReturn(0);
238*b2b77a04SHong Zhang }
239*b2b77a04SHong Zhang 
240*b2b77a04SHong Zhang #include "../src/vec/vec/impls/mpi/pvecimpl.h"   /*I  "petscvec.h"   I*/
241*b2b77a04SHong Zhang #undef __FUNCT__
242*b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
243*b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
244*b2b77a04SHong Zhang {
245*b2b77a04SHong Zhang   PetscErrorCode  ierr;
246*b2b77a04SHong Zhang   PetscScalar     *array;
247*b2b77a04SHong Zhang 
248*b2b77a04SHong Zhang   PetscFunctionBegin;
249*b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
250*b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
251*b2b77a04SHong Zhang #endif
252*b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
253*b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
254*b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
255*b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
256*b2b77a04SHong Zhang   PetscFunctionReturn(0);
257*b2b77a04SHong Zhang }
258*b2b77a04SHong Zhang 
259*b2b77a04SHong Zhang #undef __FUNCT__
260*b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW"
261*b2b77a04SHong Zhang /*
262*b2b77a04SHong Zhang    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
263*b2b77a04SHong Zhang      parallel layout determined by FFTW
264*b2b77a04SHong Zhang 
265*b2b77a04SHong Zhang    Collective on Mat
266*b2b77a04SHong Zhang 
267*b2b77a04SHong Zhang    Input Parameter:
268*b2b77a04SHong Zhang .  mat - the matrix
269*b2b77a04SHong Zhang 
270*b2b77a04SHong Zhang    Output Parameter:
271*b2b77a04SHong Zhang +   fin - (optional) input vector of forward FFTW
272*b2b77a04SHong Zhang -   fout - (optional) output vector of forward FFTW
273*b2b77a04SHong Zhang 
274*b2b77a04SHong Zhang   Level: advanced
275*b2b77a04SHong Zhang 
276*b2b77a04SHong Zhang .seealso: MatCreateFFTW()
277*b2b77a04SHong Zhang */
278*b2b77a04SHong Zhang PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
279*b2b77a04SHong Zhang {
280*b2b77a04SHong Zhang   PetscErrorCode ierr;
281*b2b77a04SHong Zhang   PetscMPIInt    size,rank;
282*b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
283*b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
284*b2b77a04SHong Zhang   PetscInt       N=fft->N;
285*b2b77a04SHong Zhang 
286*b2b77a04SHong Zhang   PetscFunctionBegin;
287*b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
288*b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
289*b2b77a04SHong Zhang #endif
290*b2b77a04SHong Zhang   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
291*b2b77a04SHong Zhang   PetscValidType(A,1);
292*b2b77a04SHong Zhang 
293*b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
294*b2b77a04SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
295*b2b77a04SHong Zhang   if (size == 1){ /* sequential case */
296*b2b77a04SHong Zhang     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
297*b2b77a04SHong Zhang     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
298*b2b77a04SHong Zhang   } else {        /* mpi case */
299*b2b77a04SHong Zhang     ptrdiff_t      alloc_local,local_n0,local_0_start;
300*b2b77a04SHong Zhang     PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
301*b2b77a04SHong Zhang     fftw_complex   *data_fin,*data_fout;
302*b2b77a04SHong Zhang 
303*b2b77a04SHong Zhang     switch (ndim){
304*b2b77a04SHong Zhang     case 1:
305*b2b77a04SHong Zhang       SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Not supported yet");
306*b2b77a04SHong Zhang       break;
307*b2b77a04SHong Zhang     case 2:
308*b2b77a04SHong Zhang       /* Get local size */
309*b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
310*b2b77a04SHong Zhang       if (fin) {
311*b2b77a04SHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
312*b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
313*b2b77a04SHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
314*b2b77a04SHong Zhang       }
315*b2b77a04SHong Zhang       if (fout) {
316*b2b77a04SHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
317*b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
318*b2b77a04SHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
319*b2b77a04SHong Zhang       }
320*b2b77a04SHong Zhang       break;
321*b2b77a04SHong Zhang     case 3:
322*b2b77a04SHong Zhang       SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Not supported yet");
323*b2b77a04SHong Zhang       break;
324*b2b77a04SHong Zhang     default:
325*b2b77a04SHong Zhang       SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Not supported yet");
326*b2b77a04SHong Zhang       break;
327*b2b77a04SHong Zhang     }
328*b2b77a04SHong Zhang   }
329*b2b77a04SHong Zhang   PetscFunctionReturn(0);
330*b2b77a04SHong Zhang }
331*b2b77a04SHong Zhang 
332*b2b77a04SHong Zhang EXTERN_C_BEGIN
333*b2b77a04SHong Zhang #undef __FUNCT__
334*b2b77a04SHong Zhang #define __FUNCT__ "MatCreate_FFTW"
335*b2b77a04SHong Zhang /*
336*b2b77a04SHong Zhang       MatCreate_FFTW - Creates a matrix object that provides FFT
337*b2b77a04SHong Zhang   via the external package FFTW
338*b2b77a04SHong Zhang 
339*b2b77a04SHong Zhang   Options Database Keys:
340*b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags
341*b2b77a04SHong Zhang 
342*b2b77a04SHong Zhang    Level: intermediate
343*b2b77a04SHong Zhang 
344*b2b77a04SHong Zhang */
345*b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A)
346*b2b77a04SHong Zhang {
347*b2b77a04SHong Zhang   PetscErrorCode ierr;
348*b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
349*b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
350*b2b77a04SHong Zhang   Mat_FFTW       *fftw;
351*b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
352*b2b77a04SHong Zhang   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
353*b2b77a04SHong Zhang   PetscBool      flg;
354*b2b77a04SHong Zhang   PetscInt       p_flag;
355*b2b77a04SHong Zhang   PetscMPIInt    size;
356*b2b77a04SHong Zhang 
357*b2b77a04SHong Zhang   PetscFunctionBegin;
358*b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
359*b2b77a04SHong Zhang   SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
360*b2b77a04SHong Zhang #endif
361*b2b77a04SHong Zhang 
362*b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
363*b2b77a04SHong Zhang   //ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
364*b2b77a04SHong Zhang 
365*b2b77a04SHong Zhang 
366*b2b77a04SHong Zhang   if (size == 1) {
367*b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
368*b2b77a04SHong Zhang     n = N;
369*b2b77a04SHong Zhang   } else {
370*b2b77a04SHong Zhang     ptrdiff_t alloc_local,local_n0,local_0_start;
371*b2b77a04SHong Zhang     switch (ndim){
372*b2b77a04SHong Zhang     case 1:
373*b2b77a04SHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not implemented yet");
374*b2b77a04SHong Zhang       break;
375*b2b77a04SHong Zhang     case 2:
376*b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
377*b2b77a04SHong Zhang       /*
378*b2b77a04SHong Zhang        PetscMPIInt    rank;
379*b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
380*b2b77a04SHong Zhang        PetscSynchronizedFlush(comm);
381*b2b77a04SHong Zhang        */
382*b2b77a04SHong Zhang       n = (PetscInt)local_n0*dim[1];
383*b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
384*b2b77a04SHong Zhang       break;
385*b2b77a04SHong Zhang     case 3:
386*b2b77a04SHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not implemented yet");
387*b2b77a04SHong Zhang       break;
388*b2b77a04SHong Zhang     default:
389*b2b77a04SHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not implemented yet");
390*b2b77a04SHong Zhang       break;
391*b2b77a04SHong Zhang     }
392*b2b77a04SHong Zhang   }
393*b2b77a04SHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
394*b2b77a04SHong Zhang 
395*b2b77a04SHong Zhang   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
396*b2b77a04SHong Zhang   fft->data = (void*)fftw;
397*b2b77a04SHong Zhang 
398*b2b77a04SHong Zhang   fft->n           = n;
399*b2b77a04SHong Zhang   fftw->p_forward  = 0;
400*b2b77a04SHong Zhang   fftw->p_backward = 0;
401*b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
402*b2b77a04SHong Zhang 
403*b2b77a04SHong Zhang   if (size == 1){
404*b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
405*b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
406*b2b77a04SHong Zhang   } else {
407*b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
408*b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
409*b2b77a04SHong Zhang   }
410*b2b77a04SHong Zhang   fft->matdestroy          = MatDestroy_FFTW;
411*b2b77a04SHong Zhang   A->ops->getvecs       = MatGetVecs_FFTW;
412*b2b77a04SHong Zhang   A->assembled          = PETSC_TRUE;
413*b2b77a04SHong Zhang 
414*b2b77a04SHong Zhang   /* get runtime options */
415*b2b77a04SHong Zhang   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
416*b2b77a04SHong Zhang     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
417*b2b77a04SHong Zhang     if (flg) {fftw->p_flag = (unsigned)p_flag;}
418*b2b77a04SHong Zhang   PetscOptionsEnd();
419*b2b77a04SHong Zhang   PetscFunctionReturn(0);
420*b2b77a04SHong Zhang }
421*b2b77a04SHong Zhang EXTERN_C_END
422