xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 1acd80e4de76bcd495fa62e8872e763f223ce968)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t    ndim_fftw,*dim_fftw;
14 #if defined(PETSC_USE_64BIT_INDICES)
15   fftw_iodim64 *iodims;
16 #else
17   fftw_iodim   *iodims;
18 #endif
19   PetscInt     partial_dim;
20   fftw_plan    p_forward,p_backward;
21   unsigned     p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
22   PetscScalar  *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
23                                                             executed for the arrays with which the plan was created */
24 } Mat_FFTW;
25 
26 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
27 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
28 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
29 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
30 extern PetscErrorCode MatDestroy_FFTW(Mat);
31 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
32 
33 /* MatMult_SeqFFTW performs forward DFT in parallel
34    Input parameter:
35      A - the matrix
36      x - the vector on which FDFT will be performed
37 
38    Output parameter:
39      y - vector that stores result of FDFT
40 */
41 #undef __FUNCT__
42 #define __FUNCT__ "MatMult_SeqFFTW"
43 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
44 {
45   PetscErrorCode ierr;
46   Mat_FFT        *fft  = (Mat_FFT*)A->data;
47   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
48   PetscScalar    *x_array,*y_array;
49 #if defined(PETSC_USE_COMPLEX)
50 #if defined(PETSC_USE_64BIT_INDICES)
51   fftw_iodim64   *iodims;
52 #else
53   fftw_iodim     *iodims;
54 #endif
55   PetscInt       i;
56 #endif
57   PetscInt       ndim = fft->ndim,*dim = fft->dim;
58 
59   PetscFunctionBegin;
60   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
61   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
62   if (!fftw->p_forward) { /* create a plan, then excute it */
63     switch (ndim) {
64     case 1:
65 #if defined(PETSC_USE_COMPLEX)
66       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
67 #else
68       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
69 #endif
70       break;
71     case 2:
72 #if defined(PETSC_USE_COMPLEX)
73       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
74 #else
75       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
76 #endif
77       break;
78     case 3:
79 #if defined(PETSC_USE_COMPLEX)
80       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
81 #else
82       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
83 #endif
84       break;
85     default:
86 #if defined(PETSC_USE_COMPLEX)
87       iodims = fftw->iodims;
88 #if defined(PETSC_USE_64BIT_INDICES)
89       if (ndim) {
90         iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
91         iodims[ndim-1].is = iodims[ndim-1].os = 1;
92         for (i=ndim-2; i>=0; --i) {
93           iodims[i].n = (ptrdiff_t)dim[i];
94           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
95         }
96       }
97       fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
98 #else
99       if (ndim) {
100         iodims[ndim-1].n = (int)dim[ndim-1];
101         iodims[ndim-1].is = iodims[ndim-1].os = 1;
102         for (i=ndim-2; i>=0; --i) {
103           iodims[i].n = (int)dim[i];
104           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
105         }
106       }
107       fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
108 #endif
109 
110 #else
111       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
112 #endif
113       break;
114     }
115     fftw->finarray  = x_array;
116     fftw->foutarray = y_array;
117     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
118                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
119     fftw_execute(fftw->p_forward);
120   } else { /* use existing plan */
121     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
122       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
123     } else {
124       fftw_execute(fftw->p_forward);
125     }
126   }
127   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
128   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 /* MatMultTranspose_SeqFFTW performs serial backward DFT
133    Input parameter:
134      A - the matrix
135      x - the vector on which BDFT will be performed
136 
137    Output parameter:
138      y - vector that stores result of BDFT
139 */
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
143 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
144 {
145   PetscErrorCode ierr;
146   Mat_FFT        *fft  = (Mat_FFT*)A->data;
147   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
148   PetscScalar    *x_array,*y_array;
149   PetscInt       ndim=fft->ndim,*dim=fft->dim;
150 #if defined(PETSC_USE_COMPLEX)
151 #if defined(PETSC_USE_64BIT_INDICES)
152   fftw_iodim64   *iodims=fftw->iodims;
153 #else
154   fftw_iodim     *iodims=fftw->iodims;
155 #endif
156 #endif
157 
158   PetscFunctionBegin;
159   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
160   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
161   if (!fftw->p_backward) { /* create a plan, then excute it */
162     switch (ndim) {
163     case 1:
164 #if defined(PETSC_USE_COMPLEX)
165       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
166 #else
167       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
168 #endif
169       break;
170     case 2:
171 #if defined(PETSC_USE_COMPLEX)
172       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
173 #else
174       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
175 #endif
176       break;
177     case 3:
178 #if defined(PETSC_USE_COMPLEX)
179       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
180 #else
181       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
182 #endif
183       break;
184     default:
185 #if defined(PETSC_USE_COMPLEX)
186 #if defined(PETSC_USE_64BIT_INDICES)
187       fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
188 #else
189       fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
190 #endif
191 #else
192       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
193 #endif
194       break;
195     }
196     fftw->binarray  = x_array;
197     fftw->boutarray = y_array;
198     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
199   } else { /* use existing plan */
200     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
201       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
202     } else {
203       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
204     }
205   }
206   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
207   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
208   PetscFunctionReturn(0);
209 }
210 
211 /* MatMult_MPIFFTW performs forward DFT in parallel
212    Input parameter:
213      A - the matrix
214      x - the vector on which FDFT will be performed
215 
216    Output parameter:
217    y   - vector that stores result of FDFT
218 */
219 #undef __FUNCT__
220 #define __FUNCT__ "MatMult_MPIFFTW"
221 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
222 {
223   PetscErrorCode ierr;
224   Mat_FFT        *fft  = (Mat_FFT*)A->data;
225   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
226   PetscScalar    *x_array,*y_array;
227   PetscInt       ndim=fft->ndim,*dim=fft->dim;
228   MPI_Comm       comm;
229 
230   PetscFunctionBegin;
231   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
232   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
233   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
234   if (!fftw->p_forward) { /* create a plan, then excute it */
235     switch (ndim) {
236     case 1:
237 #if defined(PETSC_USE_COMPLEX)
238       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
239 #else
240       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
241 #endif
242       break;
243     case 2:
244 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
245       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
246 #else
247       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
248 #endif
249       break;
250     case 3:
251 #if defined(PETSC_USE_COMPLEX)
252       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
253 #else
254       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
255 #endif
256       break;
257     default:
258 #if defined(PETSC_USE_COMPLEX)
259       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
260 #else
261       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
262 #endif
263       break;
264     }
265     fftw->finarray  = x_array;
266     fftw->foutarray = y_array;
267     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
268                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
269     fftw_execute(fftw->p_forward);
270   } else { /* use existing plan */
271     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
272       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
273     } else {
274       fftw_execute(fftw->p_forward);
275     }
276   }
277   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
278   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 /* MatMultTranspose_MPIFFTW performs parallel backward DFT
283    Input parameter:
284      A - the matrix
285      x - the vector on which BDFT will be performed
286 
287    Output parameter:
288      y - vector that stores result of BDFT
289 */
290 #undef __FUNCT__
291 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
292 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
293 {
294   PetscErrorCode ierr;
295   Mat_FFT        *fft  = (Mat_FFT*)A->data;
296   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
297   PetscScalar    *x_array,*y_array;
298   PetscInt       ndim=fft->ndim,*dim=fft->dim;
299   MPI_Comm       comm;
300 
301   PetscFunctionBegin;
302   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
303   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
304   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
305   if (!fftw->p_backward) { /* create a plan, then excute it */
306     switch (ndim) {
307     case 1:
308 #if defined(PETSC_USE_COMPLEX)
309       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
310 #else
311       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
312 #endif
313       break;
314     case 2:
315 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
316       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
317 #else
318       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
319 #endif
320       break;
321     case 3:
322 #if defined(PETSC_USE_COMPLEX)
323       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
324 #else
325       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
326 #endif
327       break;
328     default:
329 #if defined(PETSC_USE_COMPLEX)
330       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
331 #else
332       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
333 #endif
334       break;
335     }
336     fftw->binarray  = x_array;
337     fftw->boutarray = y_array;
338     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
339   } else { /* use existing plan */
340     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
341       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
342     } else {
343       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
344     }
345   }
346   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
347   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 #undef __FUNCT__
352 #define __FUNCT__ "MatDestroy_FFTW"
353 PetscErrorCode MatDestroy_FFTW(Mat A)
354 {
355   Mat_FFT        *fft  = (Mat_FFT*)A->data;
356   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
357   PetscErrorCode ierr;
358 
359   PetscFunctionBegin;
360   fftw_destroy_plan(fftw->p_forward);
361   fftw_destroy_plan(fftw->p_backward);
362   if (fftw->iodims) {
363     free(fftw->iodims);
364   }
365   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
366   ierr = PetscFree(fft->data);CHKERRQ(ierr);
367   fftw_mpi_cleanup();
368   PetscFunctionReturn(0);
369 }
370 
371 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
372 #undef __FUNCT__
373 #define __FUNCT__ "VecDestroy_MPIFFTW"
374 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
375 {
376   PetscErrorCode ierr;
377   PetscScalar    *array;
378 
379   PetscFunctionBegin;
380   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
381   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
382   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
383   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
384   PetscFunctionReturn(0);
385 }
386 
387 #undef __FUNCT__
388 #define __FUNCT__ "MatGetVecsFFTW"
389 /*@
390    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
391      parallel layout determined by FFTW
392 
393    Collective on Mat
394 
395    Input Parameter:
396 .   A - the matrix
397 
398    Output Parameter:
399 +   x - (optional) input vector of forward FFTW
400 -   y - (optional) output vector of forward FFTW
401 -   z - (optional) output vector of backward FFTW
402 
403   Level: advanced
404 
405   Note: The parallel layout of output of forward FFTW is always same as the input
406         of the backward FFTW. But parallel layout of the input vector of forward
407         FFTW might not be same as the output of backward FFTW.
408         Also note that we need to provide enough space while doing parallel real transform.
409         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
410         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
411         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
412         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
413         zeros if it is odd only one zero is needed.
414         Lastly one needs some scratch space at the end of data set in each process. alloc_local
415         figures out how much space is needed, i.e. it figures out the data+scratch space for
416         each processor and returns that.
417 
418 .seealso: MatCreateFFTW()
419 @*/
420 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
421 {
422   PetscErrorCode ierr;
423 
424   PetscFunctionBegin;
425   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatGetVecsFFTW_FFTW"
431 PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
432 {
433   PetscErrorCode ierr;
434   PetscMPIInt    size,rank;
435   MPI_Comm       comm;
436   Mat_FFT        *fft  = (Mat_FFT*)A->data;
437   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
438   PetscInt       N     = fft->N;
439   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
440 
441   PetscFunctionBegin;
442   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
443   PetscValidType(A,1);
444   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
445 
446   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
447   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
448   if (size == 1) { /* sequential case */
449 #if defined(PETSC_USE_COMPLEX)
450     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
451     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
452     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
453 #else
454     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
455     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
456     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
457 #endif
458   } else { /* parallel cases */
459     ptrdiff_t    alloc_local,local_n0,local_0_start;
460     ptrdiff_t    local_n1;
461     fftw_complex *data_fout;
462     ptrdiff_t    local_1_start;
463 #if defined(PETSC_USE_COMPLEX)
464     fftw_complex *data_fin,*data_bout;
465 #else
466     double    *data_finr,*data_boutr;
467     PetscInt  n1,N1;
468     ptrdiff_t temp;
469 #endif
470 
471     switch (ndim) {
472     case 1:
473 #if !defined(PETSC_USE_COMPLEX)
474       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
475 #else
476       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
477       if (fin) {
478         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
479         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
480 
481         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
482       }
483       if (fout) {
484         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
485         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
486 
487         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
488       }
489       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
490       if (bout) {
491         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
492         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
493 
494         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
495       }
496       break;
497 #endif
498     case 2:
499 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
500       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
501       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
502       if (fin) {
503         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
504         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
505 
506         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
507       }
508       if (bout) {
509         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
510         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
511 
512         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
513       }
514       if (fout) {
515         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
516         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
517 
518         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
519       }
520 #else
521       /* Get local size */
522       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
523       if (fin) {
524         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
525         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
526 
527         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
528       }
529       if (fout) {
530         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
531         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
532 
533         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
534       }
535       if (bout) {
536         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
537         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
538 
539         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
540       }
541 #endif
542       break;
543     case 3:
544 #if !defined(PETSC_USE_COMPLEX)
545       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
546       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
547       if (fin) {
548         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
549         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
550 
551         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
552       }
553       if (bout) {
554         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
555         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
556 
557         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
558       }
559 
560       if (fout) {
561         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
562         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
563 
564         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
565       }
566 #else
567       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
568       if (fin) {
569         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
570         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
571 
572         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
573       }
574       if (fout) {
575         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
576         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
577 
578         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
579       }
580       if (bout) {
581         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
582         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
583 
584         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
585       }
586 #endif
587       break;
588     default:
589 #if !defined(PETSC_USE_COMPLEX)
590       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
591 
592       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
593 
594       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
595       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
596 
597       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
598 
599       if (fin) {
600         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
601         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
602 
603         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
604       }
605       if (bout) {
606         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
607         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
608 
609         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
610       }
611 
612       if (fout) {
613         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
614         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
615 
616         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
617       }
618 #else
619       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
620       if (fin) {
621         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
622         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
623 
624         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
625       }
626       if (fout) {
627         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
628         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
629 
630         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
631       }
632       if (bout) {
633         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
634         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
635 
636         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
637       }
638 #endif
639       break;
640     }
641   }
642   PetscFunctionReturn(0);
643 }
644 
645 #undef __FUNCT__
646 #define __FUNCT__ "VecScatterPetscToFFTW"
647 /*@
648    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
649 
650    Collective on Mat
651 
652    Input Parameters:
653 +  A - FFTW matrix
654 -  x - the PETSc vector
655 
656    Output Parameters:
657 .  y - the FFTW vector
658 
659   Options Database Keys:
660 . -mat_fftw_plannerflags - set FFTW planner flags
661 
662    Level: intermediate
663 
664    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
665          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
666          zeros. This routine does that job by scattering operation.
667 
668 .seealso: VecScatterFFTWToPetsc()
669 @*/
670 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
671 {
672   PetscErrorCode ierr;
673 
674   PetscFunctionBegin;
675   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
676   PetscFunctionReturn(0);
677 }
678 
679 #undef __FUNCT__
680 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW"
681 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
682 {
683   PetscErrorCode ierr;
684   MPI_Comm       comm;
685   Mat_FFT        *fft  = (Mat_FFT*)A->data;
686   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
687   PetscInt       N     =fft->N;
688   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
689   PetscInt       low;
690   PetscMPIInt    rank,size;
691   PetscInt       vsize,vsize1;
692   ptrdiff_t      local_n0,local_0_start;
693   ptrdiff_t      local_n1,local_1_start;
694   VecScatter     vecscat;
695   IS             list1,list2;
696 #if !defined(PETSC_USE_COMPLEX)
697   PetscInt       i,j,k,partial_dim;
698   PetscInt       *indx1, *indx2, tempindx, tempindx1;
699   PetscInt       N1,n1,NM;
700   ptrdiff_t      temp;
701 #endif
702 
703   PetscFunctionBegin;
704   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
705   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
706   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
707   ierr = VecGetOwnershipRange(y,&low,NULL);
708 
709   if (size==1) {
710     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
711     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
712     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
713     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
714     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
715     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
716     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
717     ierr = ISDestroy(&list1);CHKERRQ(ierr);
718   } else {
719     switch (ndim) {
720     case 1:
721 #if defined(PETSC_USE_COMPLEX)
722       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
723 
724       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);
725       ierr = ISCreateStride(comm,local_n0,low,1,&list2);
726       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
727       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
728       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
729       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
730       ierr = ISDestroy(&list1);CHKERRQ(ierr);
731       ierr = ISDestroy(&list2);CHKERRQ(ierr);
732 #else
733       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
734 #endif
735       break;
736     case 2:
737 #if defined(PETSC_USE_COMPLEX)
738       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
739 
740       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
741       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
742       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
743       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
744       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
745       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
746       ierr = ISDestroy(&list1);CHKERRQ(ierr);
747       ierr = ISDestroy(&list2);CHKERRQ(ierr);
748 #else
749       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
750 
751       N1   = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
752       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
753       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
754 
755       if (dim[1]%2==0) {
756         NM = dim[1]+2;
757       } else {
758         NM = dim[1]+1;
759       }
760       for (i=0; i<local_n0; i++) {
761         for (j=0; j<dim[1]; j++) {
762           tempindx  = i*dim[1] + j;
763           tempindx1 = i*NM + j;
764 
765           indx1[tempindx]=local_0_start*dim[1]+tempindx;
766           indx2[tempindx]=low+tempindx1;
767         }
768       }
769 
770       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
771       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
772 
773       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
774       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
775       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
776       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
777       ierr = ISDestroy(&list1);CHKERRQ(ierr);
778       ierr = ISDestroy(&list2);CHKERRQ(ierr);
779       ierr = PetscFree(indx1);CHKERRQ(ierr);
780       ierr = PetscFree(indx2);CHKERRQ(ierr);
781 #endif
782       break;
783 
784     case 3:
785 #if defined(PETSC_USE_COMPLEX)
786       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
787 
788       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
789       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
790       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
791       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
792       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
793       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
794       ierr = ISDestroy(&list1);CHKERRQ(ierr);
795       ierr = ISDestroy(&list2);CHKERRQ(ierr);
796 #else
797       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
798 
799       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
800 
801       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
802       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
803 
804       if (dim[2]%2==0) NM = dim[2]+2;
805       else             NM = dim[2]+1;
806 
807       for (i=0; i<local_n0; i++) {
808         for (j=0; j<dim[1]; j++) {
809           for (k=0; k<dim[2]; k++) {
810             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
811             tempindx1 = i*dim[1]*NM + j*NM + k;
812 
813             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
814             indx2[tempindx]=low+tempindx1;
815           }
816         }
817       }
818 
819       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
820       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
821       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
822       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
823       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
824       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
825       ierr = ISDestroy(&list1);CHKERRQ(ierr);
826       ierr = ISDestroy(&list2);CHKERRQ(ierr);
827       ierr = PetscFree(indx1);CHKERRQ(ierr);
828       ierr = PetscFree(indx2);CHKERRQ(ierr);
829 #endif
830       break;
831 
832     default:
833 #if defined(PETSC_USE_COMPLEX)
834       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
835 
836       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
837       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
838       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
839       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
840       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
841       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
842       ierr = ISDestroy(&list1);CHKERRQ(ierr);
843       ierr = ISDestroy(&list2);CHKERRQ(ierr);
844 #else
845       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
846 
847       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
848 
849       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
850 
851       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
852 
853       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
854 
855       partial_dim = fftw->partial_dim;
856 
857       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
858       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
859 
860       if (dim[ndim-1]%2==0) NM = 2;
861       else                  NM = 1;
862 
863       j = low;
864       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
865         indx1[i] = local_0_start*partial_dim + i;
866         indx2[i] = j;
867         if (k%dim[ndim-1]==0) j+=NM;
868         j++;
869       }
870       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
871       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
872       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
873       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
875       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
876       ierr = ISDestroy(&list1);CHKERRQ(ierr);
877       ierr = ISDestroy(&list2);CHKERRQ(ierr);
878       ierr = PetscFree(indx1);CHKERRQ(ierr);
879       ierr = PetscFree(indx2);CHKERRQ(ierr);
880 #endif
881       break;
882     }
883   }
884   PetscFunctionReturn(0);
885 }
886 
887 #undef __FUNCT__
888 #define __FUNCT__ "VecScatterFFTWToPetsc"
889 /*@
890    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
891 
892    Collective on Mat
893 
894     Input Parameters:
895 +   A - FFTW matrix
896 -   x - FFTW vector
897 
898    Output Parameters:
899 .  y - PETSc vector
900 
901    Level: intermediate
902 
903    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
904          VecScatterFFTWToPetsc removes those extra zeros.
905 
906 .seealso: VecScatterPetscToFFTW()
907 @*/
908 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
909 {
910   PetscErrorCode ierr;
911 
912   PetscFunctionBegin;
913   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
914   PetscFunctionReturn(0);
915 }
916 
917 #undef __FUNCT__
918 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
919 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
920 {
921   PetscErrorCode ierr;
922   MPI_Comm       comm;
923   Mat_FFT        *fft  = (Mat_FFT*)A->data;
924   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
925   PetscInt       N     = fft->N;
926   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
927   PetscInt       low;
928   PetscMPIInt    rank,size;
929   ptrdiff_t      local_n0,local_0_start;
930   ptrdiff_t      local_n1,local_1_start;
931   VecScatter     vecscat;
932   IS             list1,list2;
933 #if !defined(PETSC_USE_COMPLEX)
934   PetscInt       i,j,k,partial_dim;
935   PetscInt       *indx1, *indx2, tempindx, tempindx1;
936   PetscInt       N1, n1,NM;
937   ptrdiff_t      temp;
938 #endif
939 
940   PetscFunctionBegin;
941   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
942   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
943   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
944   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
945 
946   if (size==1) {
947     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
948     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
949     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
950     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
951     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
952     ierr = ISDestroy(&list1);CHKERRQ(ierr);
953 
954   } else {
955     switch (ndim) {
956     case 1:
957 #if defined(PETSC_USE_COMPLEX)
958       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
959 
960       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);
961       ierr = ISCreateStride(comm,local_n1,low,1,&list2);
962       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
963       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
964       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
965       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
966       ierr = ISDestroy(&list1);CHKERRQ(ierr);
967       ierr = ISDestroy(&list2);CHKERRQ(ierr);
968 #else
969       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
970 #endif
971       break;
972     case 2:
973 #if defined(PETSC_USE_COMPLEX)
974       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
975 
976       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
977       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
978       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
979       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
980       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
981       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
982       ierr = ISDestroy(&list1);CHKERRQ(ierr);
983       ierr = ISDestroy(&list2);CHKERRQ(ierr);
984 #else
985       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
986 
987       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
988 
989       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
990       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
991 
992       if (dim[1]%2==0) NM = dim[1]+2;
993       else             NM = dim[1]+1;
994 
995       for (i=0; i<local_n0; i++) {
996         for (j=0; j<dim[1]; j++) {
997           tempindx = i*dim[1] + j;
998           tempindx1 = i*NM + j;
999 
1000           indx1[tempindx]=local_0_start*dim[1]+tempindx;
1001           indx2[tempindx]=low+tempindx1;
1002         }
1003       }
1004 
1005       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1006       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1007 
1008       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1009       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1010       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1011       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1012       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1013       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1014       ierr = PetscFree(indx1);CHKERRQ(ierr);
1015       ierr = PetscFree(indx2);CHKERRQ(ierr);
1016 #endif
1017       break;
1018     case 3:
1019 #if defined(PETSC_USE_COMPLEX)
1020       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1021 
1022       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1023       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
1024       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1025       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1026       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1027       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1028       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1029       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1030 #else
1031 
1032       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1033 
1034       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
1035 
1036       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1037       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
1038 
1039       if (dim[2]%2==0) NM = dim[2]+2;
1040       else             NM = dim[2]+1;
1041 
1042       for (i=0; i<local_n0; i++) {
1043         for (j=0; j<dim[1]; j++) {
1044           for (k=0; k<dim[2]; k++) {
1045             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
1046             tempindx1 = i*dim[1]*NM + j*NM + k;
1047 
1048             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1049             indx2[tempindx]=low+tempindx1;
1050           }
1051         }
1052       }
1053 
1054       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1055       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1056 
1057       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1058       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1059       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1060       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1061       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1062       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1063       ierr = PetscFree(indx1);CHKERRQ(ierr);
1064       ierr = PetscFree(indx2);CHKERRQ(ierr);
1065 #endif
1066       break;
1067     default:
1068 #if defined(PETSC_USE_COMPLEX)
1069       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1070 
1071       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1072       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1073       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1074       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1075       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1076       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1077       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1078       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1079 #else
1080       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1081 
1082       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1083 
1084       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1085 
1086       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1087 
1088       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1089 
1090       partial_dim = fftw->partial_dim;
1091 
1092       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1093       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1094 
1095       if (dim[ndim-1]%2==0) NM = 2;
1096       else                  NM = 1;
1097 
1098       j = low;
1099       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1100         indx1[i] = local_0_start*partial_dim + i;
1101         indx2[i] = j;
1102         if (k%dim[ndim-1]==0) j+=NM;
1103         j++;
1104       }
1105       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1106       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1107 
1108       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1109       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1110       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1111       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1112       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1113       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1114       ierr = PetscFree(indx1);CHKERRQ(ierr);
1115       ierr = PetscFree(indx2);CHKERRQ(ierr);
1116 #endif
1117       break;
1118     }
1119   }
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatCreate_FFTW"
1125 /*
1126     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1127 
1128   Options Database Keys:
1129 + -mat_fftw_plannerflags - set FFTW planner flags
1130 
1131    Level: intermediate
1132 
1133 */
1134 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1135 {
1136   PetscErrorCode ierr;
1137   MPI_Comm       comm;
1138   Mat_FFT        *fft=(Mat_FFT*)A->data;
1139   Mat_FFTW       *fftw;
1140   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
1141   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1142   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1143   PetscBool      flg;
1144   PetscInt       p_flag,partial_dim=1,ctr;
1145   PetscMPIInt    size,rank;
1146   ptrdiff_t      *pdim;
1147   ptrdiff_t      local_n1,local_1_start;
1148 #if !defined(PETSC_USE_COMPLEX)
1149   ptrdiff_t      temp;
1150   PetscInt       N1,tot_dim;
1151 #else
1152   PetscInt       n1;
1153 #endif
1154 
1155   PetscFunctionBegin;
1156   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1157   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1158   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1159 
1160   fftw_mpi_init();
1161   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
1162   pdim[0] = dim[0];
1163 #if !defined(PETSC_USE_COMPLEX)
1164   tot_dim = 2*dim[0];
1165 #endif
1166   for (ctr=1; ctr<ndim; ctr++) {
1167     partial_dim *= dim[ctr];
1168     pdim[ctr]    = dim[ctr];
1169 #if !defined(PETSC_USE_COMPLEX)
1170     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1171     else             tot_dim *= dim[ctr];
1172 #endif
1173   }
1174 
1175   if (size == 1) {
1176 #if defined(PETSC_USE_COMPLEX)
1177     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1178     n    = N;
1179 #else
1180     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1181     n    = tot_dim;
1182 #endif
1183 
1184   } else {
1185     ptrdiff_t local_n0,local_0_start;
1186     switch (ndim) {
1187     case 1:
1188 #if !defined(PETSC_USE_COMPLEX)
1189       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1190 #else
1191       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1192 
1193       n    = (PetscInt)local_n0;
1194       n1   = (PetscInt)local_n1;
1195       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1196 #endif
1197       break;
1198     case 2:
1199 #if defined(PETSC_USE_COMPLEX)
1200       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1201       /*
1202        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1203        PetscSynchronizedFlush(comm,PETSC_STDOUT);
1204        */
1205       n    = (PetscInt)local_n0*dim[1];
1206       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1207 #else
1208       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1209 
1210       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
1211       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1212 #endif
1213       break;
1214     case 3:
1215 #if defined(PETSC_USE_COMPLEX)
1216       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1217 
1218       n    = (PetscInt)local_n0*dim[1]*dim[2];
1219       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1220 #else
1221       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1222 
1223       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1224       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1225 #endif
1226       break;
1227     default:
1228 #if defined(PETSC_USE_COMPLEX)
1229       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1230 
1231       n    = (PetscInt)local_n0*partial_dim;
1232       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1233 #else
1234       temp = pdim[ndim-1];
1235 
1236       pdim[ndim-1] = temp/2 + 1;
1237 
1238       fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1239 
1240       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1241       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1242 
1243       pdim[ndim-1] = temp;
1244 
1245       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1246 #endif
1247       break;
1248     }
1249   }
1250   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1251   ierr      = PetscNewLog(A,&fftw);CHKERRQ(ierr);
1252   fft->data = (void*)fftw;
1253 
1254   fft->n            = n;
1255   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1256   fftw->partial_dim = partial_dim;
1257 
1258   ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr);
1259   if (size == 1) {
1260 #if defined(PETSC_USE_64BIT_INDICES)
1261     fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1262 #else
1263     fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1264 #endif
1265   }
1266 
1267   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1268 
1269   fftw->p_forward  = 0;
1270   fftw->p_backward = 0;
1271   fftw->p_flag     = FFTW_ESTIMATE;
1272 
1273   if (size == 1) {
1274     A->ops->mult          = MatMult_SeqFFTW;
1275     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1276   } else {
1277     A->ops->mult          = MatMult_MPIFFTW;
1278     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1279   }
1280   fft->matdestroy = MatDestroy_FFTW;
1281   A->assembled    = PETSC_TRUE;
1282   A->preallocated = PETSC_TRUE;
1283 
1284   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1287 
1288   /* get runtime options */
1289   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1290   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr);
1291   if (flg) {
1292     fftw->p_flag = iplans[p_flag];
1293   }
1294   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 
1299 
1300 
1301