xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision a31b9140012c9b931a23ec48babaf0b7eb4f4fd6)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t    ndim_fftw,*dim_fftw;
14 #if defined(PETSC_USE_64BIT_INDICES)
15   fftw_iodim64 *iodims;
16 #else
17   fftw_iodim   *iodims;
18 #endif
19   PetscInt     partial_dim;
20   fftw_plan    p_forward,p_backward;
21   unsigned     p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
22   PetscScalar  *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
23                                                             executed for the arrays with which the plan was created */
24 } Mat_FFTW;
25 
26 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
27 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
28 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
29 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
30 extern PetscErrorCode MatDestroy_FFTW(Mat);
31 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
32 
33 /* MatMult_SeqFFTW performs forward DFT in parallel
34    Input parameter:
35      A - the matrix
36      x - the vector on which FDFT will be performed
37 
38    Output parameter:
39      y - vector that stores result of FDFT
40 */
41 #undef __FUNCT__
42 #define __FUNCT__ "MatMult_SeqFFTW"
43 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
44 {
45   PetscErrorCode ierr;
46   Mat_FFT        *fft  = (Mat_FFT*)A->data;
47   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
48   PetscScalar    *x_array,*y_array;
49 #if defined(PETSC_USE_COMPLEX)
50 #if defined(PETSC_USE_64BIT_INDICES)
51   fftw_iodim64   *iodims;
52 #else
53   fftw_iodim     *iodims;
54 #endif
55   PetscInt       i;
56 #endif
57   PetscInt       ndim = fft->ndim,*dim = fft->dim;
58 
59   PetscFunctionBegin;
60   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
61   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
62   if (!fftw->p_forward) { /* create a plan, then excute it */
63     switch (ndim) {
64     case 1:
65 #if defined(PETSC_USE_COMPLEX)
66       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
67 #else
68       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
69 #endif
70       break;
71     case 2:
72 #if defined(PETSC_USE_COMPLEX)
73       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
74 #else
75       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
76 #endif
77       break;
78     case 3:
79 #if defined(PETSC_USE_COMPLEX)
80       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
81 #else
82       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
83 #endif
84       break;
85     default:
86 #if defined(PETSC_USE_COMPLEX)
87       iodims = fftw->iodims;
88 #if defined(PETSC_USE_64BIT_INDICES)
89       if (ndim) {
90         iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
91         iodims[ndim-1].is = iodims[ndim-1].os = 1;
92         for (i=ndim-2; i>=0; --i) {
93           iodims[i].n = (ptrdiff_t)dim[i];
94           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
95         }
96       }
97       fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
98 #else
99       if (ndim) {
100         iodims[ndim-1].n = (int)dim[ndim-1];
101         iodims[ndim-1].is = iodims[ndim-1].os = 1;
102         for (i=ndim-2; i>=0; --i) {
103           iodims[i].n = (int)dim[i];
104           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
105         }
106       }
107       fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
108 #endif
109 
110 #else
111       fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
112 #endif
113       break;
114     }
115     fftw->finarray  = x_array;
116     fftw->foutarray = y_array;
117     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
118                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
119     fftw_execute(fftw->p_forward);
120   } else { /* use existing plan */
121     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
122       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
123     } else {
124       fftw_execute(fftw->p_forward);
125     }
126   }
127   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
128   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 /* MatMultTranspose_SeqFFTW performs serial backward DFT
133    Input parameter:
134      A - the matrix
135      x - the vector on which BDFT will be performed
136 
137    Output parameter:
138      y - vector that stores result of BDFT
139 */
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
143 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
144 {
145   PetscErrorCode ierr;
146   Mat_FFT        *fft  = (Mat_FFT*)A->data;
147   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
148   PetscScalar    *x_array,*y_array;
149   PetscInt       ndim=fft->ndim,*dim=fft->dim;
150 #if defined(PETSC_USE_COMPLEX)
151 #if defined(PETSC_USE_64BIT_INDICES)
152   fftw_iodim64   *iodims=fftw->iodims;
153 #else
154   fftw_iodim     *iodims=fftw->iodims;
155 #endif
156 #endif
157 
158   PetscFunctionBegin;
159   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
160   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
161   if (!fftw->p_backward) { /* create a plan, then excute it */
162     switch (ndim) {
163     case 1:
164 #if defined(PETSC_USE_COMPLEX)
165       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
166 #else
167       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
168 #endif
169       break;
170     case 2:
171 #if defined(PETSC_USE_COMPLEX)
172       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
173 #else
174       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
175 #endif
176       break;
177     case 3:
178 #if defined(PETSC_USE_COMPLEX)
179       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
180 #else
181       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
182 #endif
183       break;
184     default:
185 #if defined(PETSC_USE_COMPLEX)
186 #if defined(PETSC_USE_64BIT_INDICES)
187       fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
188 #else
189       fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
190 #endif
191 #else
192       fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
193 #endif
194       break;
195     }
196     fftw->binarray  = x_array;
197     fftw->boutarray = y_array;
198     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
199   } else { /* use existing plan */
200     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
201       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
202     } else {
203       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
204     }
205   }
206   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
207   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
208   PetscFunctionReturn(0);
209 }
210 
211 /* MatMult_MPIFFTW performs forward DFT in parallel
212    Input parameter:
213      A - the matrix
214      x - the vector on which FDFT will be performed
215 
216    Output parameter:
217    y   - vector that stores result of FDFT
218 */
219 #undef __FUNCT__
220 #define __FUNCT__ "MatMult_MPIFFTW"
221 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
222 {
223   PetscErrorCode ierr;
224   Mat_FFT        *fft  = (Mat_FFT*)A->data;
225   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
226   PetscScalar    *x_array,*y_array;
227   PetscInt       ndim=fft->ndim,*dim=fft->dim;
228   MPI_Comm       comm;
229 
230   PetscFunctionBegin;
231   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
232   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
233   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
234   if (!fftw->p_forward) { /* create a plan, then excute it */
235     switch (ndim) {
236     case 1:
237 #if defined(PETSC_USE_COMPLEX)
238       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
239 #else
240       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
241 #endif
242       break;
243     case 2:
244 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
245       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
246 #else
247       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
248 #endif
249       break;
250     case 3:
251 #if defined(PETSC_USE_COMPLEX)
252       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
253 #else
254       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
255 #endif
256       break;
257     default:
258 #if defined(PETSC_USE_COMPLEX)
259       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
260 #else
261       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
262 #endif
263       break;
264     }
265     fftw->finarray  = x_array;
266     fftw->foutarray = y_array;
267     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
268                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
269     fftw_execute(fftw->p_forward);
270   } else { /* use existing plan */
271     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
272       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
273     } else {
274       fftw_execute(fftw->p_forward);
275     }
276   }
277   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
278   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 /* MatMultTranspose_MPIFFTW performs parallel backward DFT
283    Input parameter:
284      A - the matrix
285      x - the vector on which BDFT will be performed
286 
287    Output parameter:
288      y - vector that stores result of BDFT
289 */
290 #undef __FUNCT__
291 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
292 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
293 {
294   PetscErrorCode ierr;
295   Mat_FFT        *fft  = (Mat_FFT*)A->data;
296   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
297   PetscScalar    *x_array,*y_array;
298   PetscInt       ndim=fft->ndim,*dim=fft->dim;
299   MPI_Comm       comm;
300 
301   PetscFunctionBegin;
302   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
303   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
304   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
305   if (!fftw->p_backward) { /* create a plan, then excute it */
306     switch (ndim) {
307     case 1:
308 #if defined(PETSC_USE_COMPLEX)
309       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
310 #else
311       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
312 #endif
313       break;
314     case 2:
315 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
316       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
317 #else
318       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
319 #endif
320       break;
321     case 3:
322 #if defined(PETSC_USE_COMPLEX)
323       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
324 #else
325       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
326 #endif
327       break;
328     default:
329 #if defined(PETSC_USE_COMPLEX)
330       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
331 #else
332       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
333 #endif
334       break;
335     }
336     fftw->binarray  = x_array;
337     fftw->boutarray = y_array;
338     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
339   } else { /* use existing plan */
340     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
341       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
342     } else {
343       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
344     }
345   }
346   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
347   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 #undef __FUNCT__
352 #define __FUNCT__ "MatDestroy_FFTW"
353 PetscErrorCode MatDestroy_FFTW(Mat A)
354 {
355   Mat_FFT        *fft  = (Mat_FFT*)A->data;
356   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
357   PetscErrorCode ierr;
358 
359   PetscFunctionBegin;
360   fftw_destroy_plan(fftw->p_forward);
361   fftw_destroy_plan(fftw->p_backward);
362   if (fftw->iodims) {
363     free(fftw->iodims);
364   }
365   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
366   ierr = PetscFree(fft->data);CHKERRQ(ierr);
367   fftw_mpi_cleanup();
368   PetscFunctionReturn(0);
369 }
370 
371 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
372 #undef __FUNCT__
373 #define __FUNCT__ "VecDestroy_MPIFFTW"
374 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
375 {
376   PetscErrorCode ierr;
377   PetscScalar    *array;
378 
379   PetscFunctionBegin;
380   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
381   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
382   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
383   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
384   PetscFunctionReturn(0);
385 }
386 
387 #undef __FUNCT__
388 #define __FUNCT__ "MatGetVecsFFTW"
389 /*@
390    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
391      parallel layout determined by FFTW
392 
393    Collective on Mat
394 
395    Input Parameter:
396 .   A - the matrix
397 
398    Output Parameter:
399 +   x - (optional) input vector of forward FFTW
400 -   y - (optional) output vector of forward FFTW
401 -   z - (optional) output vector of backward FFTW
402 
403   Level: advanced
404 
405   Note: The parallel layout of output of forward FFTW is always same as the input
406         of the backward FFTW. But parallel layout of the input vector of forward
407         FFTW might not be same as the output of backward FFTW.
408         Also note that we need to provide enough space while doing parallel real transform.
409         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
410         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
411         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
412         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
413         zeros if it is odd only one zero is needed.
414         Lastly one needs some scratch space at the end of data set in each process. alloc_local
415         figures out how much space is needed, i.e. it figures out the data+scratch space for
416         each processor and returns that.
417 
418 .seealso: MatCreateFFTW()
419 @*/
420 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
421 {
422   PetscErrorCode ierr;
423 
424   PetscFunctionBegin;
425   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatGetVecsFFTW_FFTW"
431 PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
432 {
433   PetscErrorCode ierr;
434   PetscMPIInt    size,rank;
435   MPI_Comm       comm;
436   Mat_FFT        *fft  = (Mat_FFT*)A->data;
437   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
438   PetscInt       N     = fft->N;
439   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
440 
441   PetscFunctionBegin;
442   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
443   PetscValidType(A,1);
444   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
445 
446   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
447   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
448   if (size == 1) { /* sequential case */
449 #if defined(PETSC_USE_COMPLEX)
450     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
451     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
452     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
453 #else
454     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
455     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
456     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
457 #endif
458   } else { /* parallel cases */
459     ptrdiff_t    alloc_local,local_n0,local_0_start;
460     ptrdiff_t    local_n1;
461     fftw_complex *data_fout;
462     ptrdiff_t    local_1_start;
463 #if defined(PETSC_USE_COMPLEX)
464     fftw_complex *data_fin,*data_bout;
465 #else
466     double    *data_finr,*data_boutr;
467     PetscInt  n1,N1;
468     ptrdiff_t temp;
469 #endif
470 
471     switch (ndim) {
472     case 1:
473 #if !defined(PETSC_USE_COMPLEX)
474       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
475 #else
476       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
477       if (fin) {
478         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
479         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
480 
481         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
482       }
483       if (fout) {
484         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
485         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
486 
487         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
488       }
489       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
490       if (bout) {
491         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
492         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
493 
494         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
495       }
496       break;
497 #endif
498     case 2:
499 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
500       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
501       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
502       if (fin) {
503         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
504         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
505 
506         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
507       }
508       if (bout) {
509         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
510         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
511 
512         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
513       }
514       if (fout) {
515         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
516         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
517 
518         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
519       }
520 #else
521       /* Get local size */
522       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
523       if (fin) {
524         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
525         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
526 
527         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
528       }
529       if (fout) {
530         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
531         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
532 
533         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
534       }
535       if (bout) {
536         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
537         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
538 
539         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
540       }
541 #endif
542       break;
543     case 3:
544 #if !defined(PETSC_USE_COMPLEX)
545       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
546       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
547       if (fin) {
548         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
549         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
550 
551         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
552       }
553       if (bout) {
554         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
555         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
556 
557         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
558       }
559 
560       if (fout) {
561         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
562         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
563 
564         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
565       }
566 #else
567       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
568       if (fin) {
569         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
570         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
571 
572         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
573       }
574       if (fout) {
575         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
576         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
577 
578         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
579       }
580       if (bout) {
581         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
582         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
583 
584         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
585       }
586 #endif
587       break;
588     default:
589 #if !defined(PETSC_USE_COMPLEX)
590       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
591 
592       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
593 
594       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
595       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
596 
597       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
598 
599       if (fin) {
600         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
601         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
602 
603         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
604       }
605       if (bout) {
606         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
607         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
608 
609         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
610       }
611 
612       if (fout) {
613         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
614         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
615 
616         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
617       }
618 #else
619       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
620       if (fin) {
621         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
622         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
623 
624         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
625       }
626       if (fout) {
627         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
628         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
629 
630         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
631       }
632       if (bout) {
633         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
634         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
635 
636         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
637       }
638 #endif
639       break;
640     }
641   }
642   PetscFunctionReturn(0);
643 }
644 
645 #undef __FUNCT__
646 #define __FUNCT__ "VecScatterPetscToFFTW"
647 /*@
648    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
649 
650    Collective on Mat
651 
652    Input Parameters:
653 +  A - FFTW matrix
654 -  x - the PETSc vector
655 
656    Output Parameters:
657 .  y - the FFTW vector
658 
659   Options Database Keys:
660 . -mat_fftw_plannerflags - set FFTW planner flags
661 
662    Level: intermediate
663 
664    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
665          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
666          zeros. This routine does that job by scattering operation.
667 
668 .seealso: VecScatterFFTWToPetsc()
669 @*/
670 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
671 {
672   PetscErrorCode ierr;
673 
674   PetscFunctionBegin;
675   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
676   PetscFunctionReturn(0);
677 }
678 
679 #undef __FUNCT__
680 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW"
681 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
682 {
683   PetscErrorCode ierr;
684   MPI_Comm       comm;
685   Mat_FFT        *fft  = (Mat_FFT*)A->data;
686   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
687   PetscInt       N     =fft->N;
688   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
689   PetscInt       low;
690   PetscMPIInt    rank,size;
691   PetscInt       vsize,vsize1;
692   ptrdiff_t      local_n0,local_0_start;
693   ptrdiff_t      local_n1,local_1_start;
694   VecScatter     vecscat;
695   IS             list1,list2;
696 #if !defined(PETSC_USE_COMPLEX)
697   PetscInt       i,j,k,partial_dim;
698   PetscInt       *indx1, *indx2, tempindx, tempindx1;
699   PetscInt       NM;
700   ptrdiff_t      temp;
701 #endif
702 
703   PetscFunctionBegin;
704   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
705   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
706   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
707   ierr = VecGetOwnershipRange(y,&low,NULL);
708 
709   if (size==1) {
710     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
711     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
712     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
713     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
714     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
715     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
716     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
717     ierr = ISDestroy(&list1);CHKERRQ(ierr);
718   } else {
719     switch (ndim) {
720     case 1:
721 #if defined(PETSC_USE_COMPLEX)
722       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
723 
724       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);
725       ierr = ISCreateStride(comm,local_n0,low,1,&list2);
726       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
727       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
728       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
729       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
730       ierr = ISDestroy(&list1);CHKERRQ(ierr);
731       ierr = ISDestroy(&list2);CHKERRQ(ierr);
732 #else
733       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
734 #endif
735       break;
736     case 2:
737 #if defined(PETSC_USE_COMPLEX)
738       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
739 
740       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
741       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
742       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
743       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
744       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
745       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
746       ierr = ISDestroy(&list1);CHKERRQ(ierr);
747       ierr = ISDestroy(&list2);CHKERRQ(ierr);
748 #else
749       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
750 
751       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
752       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
753 
754       if (dim[1]%2==0) {
755         NM = dim[1]+2;
756       } else {
757         NM = dim[1]+1;
758       }
759       for (i=0; i<local_n0; i++) {
760         for (j=0; j<dim[1]; j++) {
761           tempindx  = i*dim[1] + j;
762           tempindx1 = i*NM + j;
763 
764           indx1[tempindx]=local_0_start*dim[1]+tempindx;
765           indx2[tempindx]=low+tempindx1;
766         }
767       }
768 
769       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
770       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
771 
772       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
773       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
774       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
775       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
776       ierr = ISDestroy(&list1);CHKERRQ(ierr);
777       ierr = ISDestroy(&list2);CHKERRQ(ierr);
778       ierr = PetscFree(indx1);CHKERRQ(ierr);
779       ierr = PetscFree(indx2);CHKERRQ(ierr);
780 #endif
781       break;
782 
783     case 3:
784 #if defined(PETSC_USE_COMPLEX)
785       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
786 
787       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
788       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
789       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
790       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
791       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
792       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
793       ierr = ISDestroy(&list1);CHKERRQ(ierr);
794       ierr = ISDestroy(&list2);CHKERRQ(ierr);
795 #else
796       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
797 
798       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
799       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
800 
801       if (dim[2]%2==0) NM = dim[2]+2;
802       else             NM = dim[2]+1;
803 
804       for (i=0; i<local_n0; i++) {
805         for (j=0; j<dim[1]; j++) {
806           for (k=0; k<dim[2]; k++) {
807             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
808             tempindx1 = i*dim[1]*NM + j*NM + k;
809 
810             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
811             indx2[tempindx]=low+tempindx1;
812           }
813         }
814       }
815 
816       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
817       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
818       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
819       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
820       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
821       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
822       ierr = ISDestroy(&list1);CHKERRQ(ierr);
823       ierr = ISDestroy(&list2);CHKERRQ(ierr);
824       ierr = PetscFree(indx1);CHKERRQ(ierr);
825       ierr = PetscFree(indx2);CHKERRQ(ierr);
826 #endif
827       break;
828 
829     default:
830 #if defined(PETSC_USE_COMPLEX)
831       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
832 
833       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
834       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
835       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
836       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
837       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
838       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
839       ierr = ISDestroy(&list1);CHKERRQ(ierr);
840       ierr = ISDestroy(&list2);CHKERRQ(ierr);
841 #else
842       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
843 
844       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
845 
846       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
847 
848       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
849 
850       partial_dim = fftw->partial_dim;
851 
852       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
853       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
854 
855       if (dim[ndim-1]%2==0) NM = 2;
856       else                  NM = 1;
857 
858       j = low;
859       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
860         indx1[i] = local_0_start*partial_dim + i;
861         indx2[i] = j;
862         if (k%dim[ndim-1]==0) j+=NM;
863         j++;
864       }
865       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
866       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
867       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
868       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
870       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
871       ierr = ISDestroy(&list1);CHKERRQ(ierr);
872       ierr = ISDestroy(&list2);CHKERRQ(ierr);
873       ierr = PetscFree(indx1);CHKERRQ(ierr);
874       ierr = PetscFree(indx2);CHKERRQ(ierr);
875 #endif
876       break;
877     }
878   }
879   PetscFunctionReturn(0);
880 }
881 
882 #undef __FUNCT__
883 #define __FUNCT__ "VecScatterFFTWToPetsc"
884 /*@
885    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
886 
887    Collective on Mat
888 
889     Input Parameters:
890 +   A - FFTW matrix
891 -   x - FFTW vector
892 
893    Output Parameters:
894 .  y - PETSc vector
895 
896    Level: intermediate
897 
898    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
899          VecScatterFFTWToPetsc removes those extra zeros.
900 
901 .seealso: VecScatterPetscToFFTW()
902 @*/
903 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
904 {
905   PetscErrorCode ierr;
906 
907   PetscFunctionBegin;
908   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
909   PetscFunctionReturn(0);
910 }
911 
912 #undef __FUNCT__
913 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
914 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
915 {
916   PetscErrorCode ierr;
917   MPI_Comm       comm;
918   Mat_FFT        *fft  = (Mat_FFT*)A->data;
919   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
920   PetscInt       N     = fft->N;
921   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
922   PetscInt       low;
923   PetscMPIInt    rank,size;
924   ptrdiff_t      local_n0,local_0_start;
925   ptrdiff_t      local_n1,local_1_start;
926   VecScatter     vecscat;
927   IS             list1,list2;
928 #if !defined(PETSC_USE_COMPLEX)
929   PetscInt       i,j,k,partial_dim;
930   PetscInt       *indx1, *indx2, tempindx, tempindx1;
931   PetscInt       NM;
932   ptrdiff_t      temp;
933 #endif
934 
935   PetscFunctionBegin;
936   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
937   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
938   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
939   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
940 
941   if (size==1) {
942     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
943     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
944     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
947     ierr = ISDestroy(&list1);CHKERRQ(ierr);
948 
949   } else {
950     switch (ndim) {
951     case 1:
952 #if defined(PETSC_USE_COMPLEX)
953       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
954 
955       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);
956       ierr = ISCreateStride(comm,local_n1,low,1,&list2);
957       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
958       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
960       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
961       ierr = ISDestroy(&list1);CHKERRQ(ierr);
962       ierr = ISDestroy(&list2);CHKERRQ(ierr);
963 #else
964       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
965 #endif
966       break;
967     case 2:
968 #if defined(PETSC_USE_COMPLEX)
969       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
970 
971       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
972       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
973       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
974       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
975       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
976       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
977       ierr = ISDestroy(&list1);CHKERRQ(ierr);
978       ierr = ISDestroy(&list2);CHKERRQ(ierr);
979 #else
980       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
981 
982       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
983       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
984 
985       if (dim[1]%2==0) NM = dim[1]+2;
986       else             NM = dim[1]+1;
987 
988       for (i=0; i<local_n0; i++) {
989         for (j=0; j<dim[1]; j++) {
990           tempindx = i*dim[1] + j;
991           tempindx1 = i*NM + j;
992 
993           indx1[tempindx]=local_0_start*dim[1]+tempindx;
994           indx2[tempindx]=low+tempindx1;
995         }
996       }
997 
998       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
999       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1000 
1001       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1002       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1003       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1005       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1006       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1007       ierr = PetscFree(indx1);CHKERRQ(ierr);
1008       ierr = PetscFree(indx2);CHKERRQ(ierr);
1009 #endif
1010       break;
1011     case 3:
1012 #if defined(PETSC_USE_COMPLEX)
1013       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1014 
1015       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1016       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
1017       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1018       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1019       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1020       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1021       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1022       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1023 #else
1024       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1025 
1026       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1027       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
1028 
1029       if (dim[2]%2==0) NM = dim[2]+2;
1030       else             NM = dim[2]+1;
1031 
1032       for (i=0; i<local_n0; i++) {
1033         for (j=0; j<dim[1]; j++) {
1034           for (k=0; k<dim[2]; k++) {
1035             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
1036             tempindx1 = i*dim[1]*NM + j*NM + k;
1037 
1038             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1039             indx2[tempindx]=low+tempindx1;
1040           }
1041         }
1042       }
1043 
1044       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1045       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1046 
1047       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1048       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1049       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1050       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1051       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1052       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1053       ierr = PetscFree(indx1);CHKERRQ(ierr);
1054       ierr = PetscFree(indx2);CHKERRQ(ierr);
1055 #endif
1056       break;
1057     default:
1058 #if defined(PETSC_USE_COMPLEX)
1059       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1060 
1061       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1062       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1063       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1064       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1065       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1066       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1067       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1068       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1069 #else
1070       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1071 
1072       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1073 
1074       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1075 
1076       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1077 
1078       partial_dim = fftw->partial_dim;
1079 
1080       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1081       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1082 
1083       if (dim[ndim-1]%2==0) NM = 2;
1084       else                  NM = 1;
1085 
1086       j = low;
1087       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1088         indx1[i] = local_0_start*partial_dim + i;
1089         indx2[i] = j;
1090         if (k%dim[ndim-1]==0) j+=NM;
1091         j++;
1092       }
1093       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1094       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1095 
1096       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1097       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1098       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1099       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1100       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1101       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1102       ierr = PetscFree(indx1);CHKERRQ(ierr);
1103       ierr = PetscFree(indx2);CHKERRQ(ierr);
1104 #endif
1105       break;
1106     }
1107   }
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 #undef __FUNCT__
1112 #define __FUNCT__ "MatCreate_FFTW"
1113 /*
1114     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1115 
1116   Options Database Keys:
1117 + -mat_fftw_plannerflags - set FFTW planner flags
1118 
1119    Level: intermediate
1120 
1121 */
1122 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1123 {
1124   PetscErrorCode ierr;
1125   MPI_Comm       comm;
1126   Mat_FFT        *fft=(Mat_FFT*)A->data;
1127   Mat_FFTW       *fftw;
1128   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
1129   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1130   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1131   PetscBool      flg;
1132   PetscInt       p_flag,partial_dim=1,ctr;
1133   PetscMPIInt    size,rank;
1134   ptrdiff_t      *pdim;
1135   ptrdiff_t      local_n1,local_1_start;
1136 #if !defined(PETSC_USE_COMPLEX)
1137   ptrdiff_t      temp;
1138   PetscInt       N1,tot_dim;
1139 #else
1140   PetscInt       n1;
1141 #endif
1142 
1143   PetscFunctionBegin;
1144   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1145   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1146   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1147 
1148   fftw_mpi_init();
1149   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
1150   pdim[0] = dim[0];
1151 #if !defined(PETSC_USE_COMPLEX)
1152   tot_dim = 2*dim[0];
1153 #endif
1154   for (ctr=1; ctr<ndim; ctr++) {
1155     partial_dim *= dim[ctr];
1156     pdim[ctr]    = dim[ctr];
1157 #if !defined(PETSC_USE_COMPLEX)
1158     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1159     else             tot_dim *= dim[ctr];
1160 #endif
1161   }
1162 
1163   if (size == 1) {
1164 #if defined(PETSC_USE_COMPLEX)
1165     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1166     n    = N;
1167 #else
1168     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1169     n    = tot_dim;
1170 #endif
1171 
1172   } else {
1173     ptrdiff_t local_n0,local_0_start;
1174     switch (ndim) {
1175     case 1:
1176 #if !defined(PETSC_USE_COMPLEX)
1177       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1178 #else
1179       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1180 
1181       n    = (PetscInt)local_n0;
1182       n1   = (PetscInt)local_n1;
1183       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1184 #endif
1185       break;
1186     case 2:
1187 #if defined(PETSC_USE_COMPLEX)
1188       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1189       /*
1190        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1191        PetscSynchronizedFlush(comm,PETSC_STDOUT);
1192        */
1193       n    = (PetscInt)local_n0*dim[1];
1194       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1195 #else
1196       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1197 
1198       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
1199       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1200 #endif
1201       break;
1202     case 3:
1203 #if defined(PETSC_USE_COMPLEX)
1204       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1205 
1206       n    = (PetscInt)local_n0*dim[1]*dim[2];
1207       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1208 #else
1209       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1210 
1211       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1212       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1213 #endif
1214       break;
1215     default:
1216 #if defined(PETSC_USE_COMPLEX)
1217       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1218 
1219       n    = (PetscInt)local_n0*partial_dim;
1220       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1221 #else
1222       temp = pdim[ndim-1];
1223 
1224       pdim[ndim-1] = temp/2 + 1;
1225 
1226       fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1227 
1228       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1229       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1230 
1231       pdim[ndim-1] = temp;
1232 
1233       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1234 #endif
1235       break;
1236     }
1237   }
1238   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1239   ierr      = PetscNewLog(A,&fftw);CHKERRQ(ierr);
1240   fft->data = (void*)fftw;
1241 
1242   fft->n            = n;
1243   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1244   fftw->partial_dim = partial_dim;
1245 
1246   ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr);
1247   if (size == 1) {
1248 #if defined(PETSC_USE_64BIT_INDICES)
1249     fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1250 #else
1251     fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1252 #endif
1253   }
1254 
1255   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1256 
1257   fftw->p_forward  = 0;
1258   fftw->p_backward = 0;
1259   fftw->p_flag     = FFTW_ESTIMATE;
1260 
1261   if (size == 1) {
1262     A->ops->mult          = MatMult_SeqFFTW;
1263     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1264   } else {
1265     A->ops->mult          = MatMult_MPIFFTW;
1266     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1267   }
1268   fft->matdestroy = MatDestroy_FFTW;
1269   A->assembled    = PETSC_TRUE;
1270   A->preallocated = PETSC_TRUE;
1271 
1272   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1275 
1276   /* get runtime options */
1277   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1278   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr);
1279   if (flg) {
1280     fftw->p_flag = iplans[p_flag];
1281   }
1282   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 
1287 
1288 
1289