xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision a6b72b0113fd9afdd9f1d31d300bc7110ef3b3aa)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   fftw_plan   p_forward,p_backward;
15   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
16   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
17                                                             executed for the arrays with which the plan was created */
18 } Mat_FFTW;
19 
20 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
21 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatDestroy_FFTW(Mat);
25 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
26 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatMult_SeqFFTW"
30 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
31 {
32   PetscErrorCode ierr;
33   Mat_FFT        *fft  = (Mat_FFT*)A->data;
34   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
35   PetscScalar    *x_array,*y_array;
36   PetscInt       ndim=fft->ndim,*dim=fft->dim;
37 
38   PetscFunctionBegin;
39 #if !defined(PETSC_USE_COMPLEX)
40 
41   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
42 #endif
43   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
44   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
45   if (!fftw->p_forward){ /* create a plan, then excute it */
46     switch (ndim){
47     case 1:
48       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
49       break;
50     case 2:
51       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
52       break;
53     case 3:
54       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
55       break;
56     default:
57       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
58       break;
59     }
60     fftw->finarray  = x_array;
61     fftw->foutarray = y_array;
62     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
63                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
64     fftw_execute(fftw->p_forward);
65   } else { /* use existing plan */
66     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
67       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
68     } else {
69       fftw_execute(fftw->p_forward);
70     }
71   }
72   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
73   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNCT__
78 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
79 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
80 {
81   PetscErrorCode ierr;
82   Mat_FFT        *fft = (Mat_FFT*)A->data;
83   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
84   PetscScalar    *x_array,*y_array;
85   PetscInt       ndim=fft->ndim,*dim=fft->dim;
86 
87   PetscFunctionBegin;
88 #if !defined(PETSC_USE_COMPLEX)
89   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
90 #endif
91   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
92   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
93   if (!fftw->p_backward){ /* create a plan, then excute it */
94     switch (ndim){
95     case 1:
96       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
97       break;
98     case 2:
99       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
100       break;
101     case 3:
102       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
103       break;
104     default:
105       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
106       break;
107     }
108     fftw->binarray  = x_array;
109     fftw->boutarray = y_array;
110     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
111   } else { /* use existing plan */
112     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
113       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
114     } else {
115       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
116     }
117   }
118   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
119   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
120   PetscFunctionReturn(0);
121 }
122 
123 #undef __FUNCT__
124 #define __FUNCT__ "MatMult_MPIFFTW"
125 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
126 {
127   PetscErrorCode ierr;
128   Mat_FFT        *fft  = (Mat_FFT*)A->data;
129   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
130   PetscScalar    *x_array,*y_array;
131   PetscInt       ndim=fft->ndim,*dim=fft->dim;
132   MPI_Comm       comm=((PetscObject)A)->comm;
133 // PetscInt ctr;
134 //  ptrdiff_t      ndim1=(ptrdiff_t) ndim,*pdim;
135 //  ndim1=(ptrdiff_t) ndim;
136 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
137 
138 //  for(ctr=0;ctr<ndim;ctr++)
139 //     {
140 //      pdim[ctr] = dim[ctr];
141 //     }
142 
143   PetscFunctionBegin;
144 #if !defined(PETSC_USE_COMPLEX)
145   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
146 #endif
147 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
148 //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
149 
150   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
151   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
152   if (!fftw->p_forward){ /* create a plan, then excute it */
153     switch (ndim){
154     case 1:
155       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
156       break;
157     case 2:
158       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
159       break;
160     case 3:
161       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
162       break;
163     default:
164       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
165  //     fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
166       break;
167     }
168     fftw->finarray  = x_array;
169     fftw->foutarray = y_array;
170     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
171                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
172     fftw_execute(fftw->p_forward);
173   } else { /* use existing plan */
174     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
175       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
176     } else {
177       fftw_execute(fftw->p_forward);
178     }
179   }
180   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
181   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
182   PetscFunctionReturn(0);
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
187 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
188 {
189   PetscErrorCode ierr;
190   Mat_FFT        *fft  = (Mat_FFT*)A->data;
191   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
192   PetscScalar    *x_array,*y_array;
193   PetscInt       ndim=fft->ndim,*dim=fft->dim;
194   MPI_Comm       comm=((PetscObject)A)->comm;
195 //  PetscInt       ctr;
196 //  ptrdiff_t      ndim1=(ptrdiff_t)ndim,*pdim;
197 //  ndim1=(ptrdiff_t) ndim;
198 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
199 
200 //  for(ctr=0;ctr<ndim;ctr++)
201 //     {
202 //      pdim[ctr] = dim[ctr];
203 //     }
204 
205   PetscFunctionBegin;
206 #if !defined(PETSC_USE_COMPLEX)
207   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
208 #endif
209 //  ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr);
210 // should pdim be a member of Mat_FFTW?
211 //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
212 
213   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
214   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
215   if (!fftw->p_backward){ /* create a plan, then excute it */
216     switch (ndim){
217     case 1:
218       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
219       break;
220     case 2:
221       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
222       break;
223     case 3:
224       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
225       break;
226     default:
227       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
228 //      fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
229       break;
230     }
231     fftw->binarray  = x_array;
232     fftw->boutarray = y_array;
233     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
234   } else { /* use existing plan */
235     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
236       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
237     } else {
238       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
239     }
240   }
241   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
242   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 #undef __FUNCT__
247 #define __FUNCT__ "MatDestroy_FFTW"
248 PetscErrorCode MatDestroy_FFTW(Mat A)
249 {
250   Mat_FFT        *fft = (Mat_FFT*)A->data;
251   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
252   PetscErrorCode ierr;
253 
254   PetscFunctionBegin;
255 #if !defined(PETSC_USE_COMPLEX)
256   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
257 #endif
258   fftw_destroy_plan(fftw->p_forward);
259   fftw_destroy_plan(fftw->p_backward);
260   ierr = PetscFree(fft->data);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
265 #undef __FUNCT__
266 #define __FUNCT__ "VecDestroy_MPIFFTW"
267 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
268 {
269   PetscErrorCode  ierr;
270   PetscScalar     *array;
271 
272   PetscFunctionBegin;
273 #if !defined(PETSC_USE_COMPLEX)
274   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
275 #endif
276   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
277   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
278   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
279   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatGetVecs_FFTW1D"
285 /*
286    MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the
287      parallel layout determined by FFTW-1D
288 
289 */
290 PetscErrorCode  MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout)
291 {
292   PetscErrorCode ierr;
293   PetscMPIInt    size,rank;
294   MPI_Comm       comm=((PetscObject)A)->comm;
295   Mat_FFT        *fft = (Mat_FFT*)A->data;
296 //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
297   PetscInt       N=fft->N;
298   PetscInt       ndim=fft->ndim,*dim=fft->dim;
299   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
300   ptrdiff_t      f_local_n1,f_local_1_end;
301   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
302   ptrdiff_t      b_local_n1,b_local_1_end;
303   fftw_complex   *data_fin,*data_fout,*data_bin,*data_bout;
304 
305   PetscFunctionBegin;
306 #if !defined(PETSC_USE_COMPLEX)
307   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
308 #endif
309   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
310   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
311   if (size == 1){
312     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
313   }
314   else {
315       if (ndim>1){
316         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
317       else {
318           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
319           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
320           if (fin) {
321             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
322             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
323             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
324           }
325           if (fout) {
326             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
327             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
328             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
329           }
330           if (bin) {
331             data_bin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
332             ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr);
333             (*bin)->ops->destroy   = VecDestroy_MPIFFTW;
334           }
335           if (bout) {
336             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
337             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
338             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
339           }
340   }
341   if (fin){
342     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
343   }
344   if (fout){
345     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
346   }
347   if (bin){
348     ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr);
349   }
350   if (bout){
351     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
352   }
353   PetscFunctionReturn(0);
354 }
355 
356 
357 
358 
359 
360 
361 
362 }
363 #undef __FUNCT__
364 #define __FUNCT__ "MatGetVecs_FFTW"
365 /*
366    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
367      parallel layout determined by FFTW
368 
369    Collective on Mat
370 
371    Input Parameter:
372 .  mat - the matrix
373 
374    Output Parameter:
375 +   fin - (optional) input vector of forward FFTW
376 -   fout - (optional) output vector of forward FFTW
377 
378   Level: advanced
379 
380 .seealso: MatCreateFFTW()
381 */
382 PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
383 {
384   PetscErrorCode ierr;
385   PetscMPIInt    size,rank;
386   MPI_Comm       comm=((PetscObject)A)->comm;
387   Mat_FFT        *fft = (Mat_FFT*)A->data;
388   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
389   PetscInt       N=fft->N;
390 
391   PetscFunctionBegin;
392 #if !defined(PETSC_USE_COMPLEX)
393   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
394 #endif
395   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
396   PetscValidType(A,1);
397 
398   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
399   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
400   if (size == 1){ /* sequential case */
401     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
402     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
403   } else {        /* mpi case */
404     ptrdiff_t      alloc_local,local_n0,local_0_start;
405     ptrdiff_t      local_n1,local_1_end;
406     PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
407     fftw_complex   *data_fin,*data_fout;
408 //    PetscInt ctr;
409 //    ptrdiff_t      ndim1,*pdim;
410 //    ndim1=(ptrdiff_t) ndim;
411 //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
412 
413 //    for(ctr=0;ctr<ndim;ctr++)
414 //        {
415 //           pdim[ctr] = dim[ctr];
416 //       }
417 
418     switch (ndim){
419     case 1:
420       /* Get local size */
421       /* We need to write an error message here saying that one cannot call this routine when doing paralllel 1D complex FFTW */
422 //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
423 
424       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
425       if (fin) {
426         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
427         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
428         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
429       }
430       if (fout) {
431         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
432         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
433         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
434       }
435       break;
436     case 2:
437       /* Get local size */
438       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
439       if (fin) {
440         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
441         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
442         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
443       }
444       if (fout) {
445         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
446         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
447         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
448       }
449       break;
450     case 3:
451       /* Get local size */
452       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
453 //      printf("The quantity n is %d",n);
454       if (fin) {
455         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
456         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
457         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
458       }
459       if (fout) {
460         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
461         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
462         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
463       }
464       break;
465     default:
466       /* Get local size */
467       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
468 //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
469 //      printf("The value of alloc local is %d",alloc_local);
470 //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
471 //      for(i=0;i<ndim;i++)
472 //         {
473 //          pdim[i]=dim[i];printf("%d",pdim[i]);
474 //         }
475 //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
476 //      printf("The quantity n is %d",n);
477       if (fin) {
478         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
479         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
480         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
481       }
482       if (fout) {
483         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
484         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
485         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
486       }
487       break;
488     }
489   }
490   if (fin){
491     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
492   }
493   if (fout){
494     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
495   }
496   PetscFunctionReturn(0);
497 }
498 
499 EXTERN_C_BEGIN
500 #undef __FUNCT__
501 #define __FUNCT__ "MatCreate_FFTW"
502 /*
503       MatCreate_FFTW - Creates a matrix object that provides FFT
504   via the external package FFTW
505 
506   Options Database Keys:
507 + -mat_fftw_plannerflags - set FFTW planner flags
508 
509    Level: intermediate
510 
511 */
512 PetscErrorCode MatCreate_FFTW(Mat A)
513 {
514   PetscErrorCode ierr;
515   MPI_Comm       comm=((PetscObject)A)->comm;
516   Mat_FFT        *fft=(Mat_FFT*)A->data;
517   Mat_FFTW       *fftw;
518   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
519   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
520   PetscBool      flg;
521   PetscInt       p_flag,partial_dim=1,ctr;
522   PetscMPIInt    size,rank;
523   ptrdiff_t      *pdim;
524 
525   PetscFunctionBegin;
526 #if !defined(PETSC_USE_COMPLEX)
527   SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
528 #endif
529 
530   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
531   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
532 
533   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
534   pdim[0] = dim[0];
535   for(ctr=1;ctr<ndim;ctr++)
536       {
537           partial_dim*= dim[ctr];
538           pdim[ctr] = dim[ctr];
539       }
540 //  printf("partial dimension is %d",partial_dim);
541   if (size == 1) {
542     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
543     n = N;
544   } else {
545     ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end;
546     switch (ndim){
547     case 1:
548       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
549       n = (PetscInt)local_n0;
550       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
551  //     PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs_FFTW1D_C","MatGetVecs_FFTW1D",MatGetVecs_FFTW1D);
552       break;
553     case 2:
554       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
555       /*
556        PetscMPIInt    rank;
557        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
558        PetscSynchronizedFlush(comm);
559        */
560       n = (PetscInt)local_n0*dim[1];
561       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
562       break;
563     case 3:
564       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
565 //      printf("The value of alloc local is %d",alloc_local);
566       n = (PetscInt)local_n0*dim[1]*dim[2];
567       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
568       break;
569     default:
570       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
571 //      printf("The value of alloc local is %ld from process %d\n",alloc_local,rank);
572 //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
573       n = (PetscInt)local_n0*partial_dim;
574 //      printf("New partial dimension is %d %d %d",n,N,ndim);
575       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
576       break;
577     }
578   }
579   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
580 
581   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
582   fft->data = (void*)fftw;
583 
584   fft->n           = n;
585   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
586   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
587 //  (fftw->dim_fftw)=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
588   for(ctr=0;ctr<ndim;ctr++)
589       {
590           (fftw->dim_fftw)[ctr]=dim[ctr];
591       }
592 
593   fftw->p_forward  = 0;
594   fftw->p_backward = 0;
595   fftw->p_flag     = FFTW_ESTIMATE;
596 
597   if (size == 1){
598     A->ops->mult          = MatMult_SeqFFTW;
599     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
600   } else {
601     A->ops->mult          = MatMult_MPIFFTW;
602     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
603   }
604   fft->matdestroy          = MatDestroy_FFTW;
605   A->ops->getvecs       = MatGetVecs_FFTW;
606   A->assembled          = PETSC_TRUE;
607 
608   /* get runtime options */
609   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
610     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
611     if (flg) {fftw->p_flag = (unsigned)p_flag;}
612   PetscOptionsEnd();
613   PetscFunctionReturn(0);
614 }
615 EXTERN_C_END
616