xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 8c1d5ab369abe91f47a3d8806b1b424684d40086)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   fftw_iodim  *iodims;
15   PetscInt    partial_dim;
16   fftw_plan   p_forward,p_backward;
17   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
18   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
19                                                             executed for the arrays with which the plan was created */
20 } Mat_FFTW;
21 
22 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
25 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
26 extern PetscErrorCode MatDestroy_FFTW(Mat);
27 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
28 
29 /* MatMult_SeqFFTW performs forward DFT in parallel
30    Input parameter:
31      A - the matrix
32      x - the vector on which FDFT will be performed
33 
34    Output parameter:
35      y - vector that stores result of FDFT
36 */
37 #undef __FUNCT__
38 #define __FUNCT__ "MatMult_SeqFFTW"
39 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
40 {
41   PetscErrorCode ierr;
42   Mat_FFT        *fft  = (Mat_FFT*)A->data;
43   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
44   PetscScalar    *x_array,*y_array;
45   PetscInt       ndim = fft->ndim,*dim = fft->dim,i;
46   //int            *dim_32,i;
47   fftw_iodim     *iodims;
48 
49   PetscFunctionBegin;
50   //ierr = PetscMalloc(ndim*sizeof(int),&dim_32);CHKERRQ(ierr);
51   //for (i=0; i<ndim; i++) dim_32[i] = (int)dim[i];
52 
53   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
54   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
55   if (!fftw->p_forward) { /* create a plan, then excute it */
56     switch (ndim) {
57     case 1:
58 #if defined(PETSC_USE_COMPLEX)
59       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
60 #else
61       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
62 #endif
63       break;
64     case 2:
65 #if defined(PETSC_USE_COMPLEX)
66       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
67 #else
68       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
69 #endif
70       break;
71     case 3:
72 #if defined(PETSC_USE_COMPLEX)
73       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
74       //fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,FFTW_MEASURE);
75 #else
76       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
77 #endif
78       break;
79     default:
80 #if defined(PETSC_USE_COMPLEX)
81       //fftw->p_forward = fftw_plan_dft((int)ndim,(int*)dim_32,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
82       /* bug: dim[3] changes from 10 to 0 '--with-64-bit-indices=1' */
83 
84       iodims = (fftw_iodim*)fftw->iodims;
85       //ierr = PetscMalloc(ndim*sizeof(fftw_iodim),&iodims);CHKERRQ(ierr);
86       if (ndim) {
87         iodims[ndim-1].n = dim[ndim-1];
88         iodims[ndim-1].is = iodims[ndim-1].os = 1;
89         for (i=ndim-2; i>=0; --i) {
90           iodims[i].n = dim[i];
91           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
92         }
93       }
94 
95       fftw->p_forward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
96 
97 #else
98       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
99 #endif
100       break;
101     }
102     fftw->finarray  = x_array;
103     fftw->foutarray = y_array;
104     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
105                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
106     fftw_execute(fftw->p_forward);
107   } else { /* use existing plan */
108     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
109       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
110     } else {
111       fftw_execute(fftw->p_forward);
112     }
113   }
114   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
115   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
116   //ierr = PetscFree(dim_32);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 /* MatMultTranspose_SeqFFTW performs serial backward DFT
121    Input parameter:
122      A - the matrix
123      x - the vector on which BDFT will be performed
124 
125    Output parameter:
126      y - vector that stores result of BDFT
127 */
128 
129 #undef __FUNCT__
130 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
131 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
132 {
133   PetscErrorCode ierr;
134   Mat_FFT        *fft  = (Mat_FFT*)A->data;
135   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
136   PetscScalar    *x_array,*y_array;
137   PetscInt       ndim=fft->ndim,*dim=fft->dim;
138   fftw_iodim     *iodims=(fftw_iodim*)fftw->iodims;
139   //int            *dim_32,i; /* for 64-bit */
140 
141   PetscFunctionBegin;
142   //ierr = PetscMalloc(ndim*sizeof(int),&dim_32);
143   //for (i=0; i<ndim; i++) dim_32[i] = (int)dim[i];
144 
145   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
146   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
147   if (!fftw->p_backward) { /* create a plan, then excute it */
148     switch (ndim) {
149     case 1:
150 #if defined(PETSC_USE_COMPLEX)
151       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
152 #else
153       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
154 #endif
155       break;
156     case 2:
157 #if defined(PETSC_USE_COMPLEX)
158       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
159 #else
160       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
161 #endif
162       break;
163     case 3:
164 #if defined(PETSC_USE_COMPLEX)
165       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
166       //fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,FFTW_MEASURE);
167 #else
168       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
169 #endif
170       break;
171     default:
172 #if defined(PETSC_USE_COMPLEX)
173       //fftw->p_backward = fftw_plan_dft((int)ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
174       fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
175 #else
176       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
177 #endif
178       break;
179     }
180     fftw->binarray  = x_array;
181     fftw->boutarray = y_array;
182     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
183   } else { /* use existing plan */
184     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
185       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
186     } else {
187       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
188     }
189   }
190   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
191   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
192   //ierr = PetscFree(dim_32);CHKERRQ(ierr);
193   PetscFunctionReturn(0);
194 }
195 
196 /* MatMult_MPIFFTW performs forward DFT in parallel
197    Input parameter:
198      A - the matrix
199      x - the vector on which FDFT will be performed
200 
201    Output parameter:
202    y   - vector that stores result of FDFT
203 */
204 #undef __FUNCT__
205 #define __FUNCT__ "MatMult_MPIFFTW"
206 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
207 {
208   PetscErrorCode ierr;
209   Mat_FFT        *fft  = (Mat_FFT*)A->data;
210   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
211   PetscScalar    *x_array,*y_array;
212   PetscInt       ndim=fft->ndim,*dim=fft->dim;
213   MPI_Comm       comm;
214 
215   PetscFunctionBegin;
216   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
217   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
218   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
219   if (!fftw->p_forward) { /* create a plan, then excute it */
220     switch (ndim) {
221     case 1:
222 #if defined(PETSC_USE_COMPLEX)
223       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
224 #else
225       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
226 #endif
227       break;
228     case 2:
229 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
230       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
231 #else
232       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
233 #endif
234       break;
235     case 3:
236 #if defined(PETSC_USE_COMPLEX)
237       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
238 #else
239       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
240 #endif
241       break;
242     default:
243 #if defined(PETSC_USE_COMPLEX)
244       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
245 #else
246       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
247 #endif
248       break;
249     }
250     fftw->finarray  = x_array;
251     fftw->foutarray = y_array;
252     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
253                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
254     fftw_execute(fftw->p_forward);
255   } else { /* use existing plan */
256     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
257       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
258     } else {
259       fftw_execute(fftw->p_forward);
260     }
261   }
262   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
263   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
264   PetscFunctionReturn(0);
265 }
266 
267 /* MatMultTranspose_MPIFFTW performs parallel backward DFT
268    Input parameter:
269      A - the matrix
270      x - the vector on which BDFT will be performed
271 
272    Output parameter:
273      y - vector that stores result of BDFT
274 */
275 #undef __FUNCT__
276 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
277 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
278 {
279   PetscErrorCode ierr;
280   Mat_FFT        *fft  = (Mat_FFT*)A->data;
281   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
282   PetscScalar    *x_array,*y_array;
283   PetscInt       ndim=fft->ndim,*dim=fft->dim;
284   MPI_Comm       comm;
285 
286   PetscFunctionBegin;
287   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
288   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
289   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
290   if (!fftw->p_backward) { /* create a plan, then excute it */
291     switch (ndim) {
292     case 1:
293 #if defined(PETSC_USE_COMPLEX)
294       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
295 #else
296       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
297 #endif
298       break;
299     case 2:
300 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
301       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
302 #else
303       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
304 #endif
305       break;
306     case 3:
307 #if defined(PETSC_USE_COMPLEX)
308       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
309 #else
310       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
311 #endif
312       break;
313     default:
314 #if defined(PETSC_USE_COMPLEX)
315       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
316       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
317 #else
318       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
319 #endif
320       break;
321     }
322     fftw->binarray  = x_array;
323     fftw->boutarray = y_array;
324     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
325   } else { /* use existing plan */
326     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
327       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
328     } else {
329       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
330     }
331   }
332   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
333   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "MatDestroy_FFTW"
339 PetscErrorCode MatDestroy_FFTW(Mat A)
340 {
341   Mat_FFT        *fft  = (Mat_FFT*)A->data;
342   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
343   PetscErrorCode ierr;
344 
345   PetscFunctionBegin;
346   fftw_destroy_plan(fftw->p_forward);
347   fftw_destroy_plan(fftw->p_backward);
348   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
349   if (fftw->iodims) {
350     /* ierr = PetscFree(fftw->iodims);CHKERRQ(ierr); */
351     free(fftw->iodims);
352   }
353   ierr = PetscFree(fft->data);CHKERRQ(ierr);
354   fftw_mpi_cleanup();
355   PetscFunctionReturn(0);
356 }
357 
358 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
359 #undef __FUNCT__
360 #define __FUNCT__ "VecDestroy_MPIFFTW"
361 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
362 {
363   PetscErrorCode ierr;
364   PetscScalar    *array;
365 
366   PetscFunctionBegin;
367   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
368   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
369   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
370   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
371   PetscFunctionReturn(0);
372 }
373 
374 #undef __FUNCT__
375 #define __FUNCT__ "MatGetVecsFFTW"
376 /*@
377    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
378      parallel layout determined by FFTW
379 
380    Collective on Mat
381 
382    Input Parameter:
383 .   A - the matrix
384 
385    Output Parameter:
386 +   x - (optional) input vector of forward FFTW
387 -   y - (optional) output vector of forward FFTW
388 -   z - (optional) output vector of backward FFTW
389 
390   Level: advanced
391 
392   Note: The parallel layout of output of forward FFTW is always same as the input
393         of the backward FFTW. But parallel layout of the input vector of forward
394         FFTW might not be same as the output of backward FFTW.
395         Also note that we need to provide enough space while doing parallel real transform.
396         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
397         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
398         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
399         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
400         zeros if it is odd only one zero is needed.
401         Lastly one needs some scratch space at the end of data set in each process. alloc_local
402         figures out how much space is needed, i.e. it figures out the data+scratch space for
403         each processor and returns that.
404 
405 .seealso: MatCreateFFTW()
406 @*/
407 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
408 {
409   PetscErrorCode ierr;
410 
411   PetscFunctionBegin;
412   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
413   PetscFunctionReturn(0);
414 }
415 
416 #undef __FUNCT__
417 #define __FUNCT__ "MatGetVecsFFTW_FFTW"
418 PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
419 {
420   PetscErrorCode ierr;
421   PetscMPIInt    size,rank;
422   MPI_Comm       comm;
423   Mat_FFT        *fft  = (Mat_FFT*)A->data;
424   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
425   PetscInt       N     = fft->N;
426   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
427 
428   PetscFunctionBegin;
429   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
430   PetscValidType(A,1);
431   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
432 
433   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
434   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
435   if (size == 1) { /* sequential case */
436 #if defined(PETSC_USE_COMPLEX)
437     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
438     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
439     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
440 #else
441     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
442     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
443     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
444 #endif
445   } else { /* parallel cases */
446     ptrdiff_t    alloc_local,local_n0,local_0_start;
447     ptrdiff_t    local_n1;
448     fftw_complex *data_fout;
449     ptrdiff_t    local_1_start;
450 #if defined(PETSC_USE_COMPLEX)
451     fftw_complex *data_fin,*data_bout;
452 #else
453     double    *data_finr,*data_boutr;
454     PetscInt  n1,N1;
455     ptrdiff_t temp;
456 #endif
457 
458     switch (ndim) {
459     case 1:
460 #if !defined(PETSC_USE_COMPLEX)
461       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
462 #else
463       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
464       if (fin) {
465         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
466         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
467 
468         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
469       }
470       if (fout) {
471         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
472         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
473 
474         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
475       }
476       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
477       if (bout) {
478         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
479         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
480 
481         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
482       }
483       break;
484 #endif
485     case 2:
486 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
487       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
488       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
489       if (fin) {
490         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
491         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
492 
493         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
494       }
495       if (bout) {
496         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
497         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
498 
499         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
500       }
501       if (fout) {
502         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
503         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
504 
505         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
506       }
507 #else
508       /* Get local size */
509       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
510       if (fin) {
511         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
512         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
513 
514         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
515       }
516       if (fout) {
517         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
518         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
519 
520         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
521       }
522       if (bout) {
523         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
524         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
525 
526         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
527       }
528 #endif
529       break;
530     case 3:
531 #if !defined(PETSC_USE_COMPLEX)
532       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
533       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
534       if (fin) {
535         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
536         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
537 
538         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
539       }
540       if (bout) {
541         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
542         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
543 
544         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
545       }
546 
547       if (fout) {
548         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
549         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
550 
551         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
552       }
553 #else
554       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
555       if (fin) {
556         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
557         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
558 
559         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
560       }
561       if (fout) {
562         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
563         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
564 
565         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
566       }
567       if (bout) {
568         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
569         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
570 
571         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
572       }
573 #endif
574       break;
575     default:
576 #if !defined(PETSC_USE_COMPLEX)
577       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
578 
579       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
580 
581       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
582       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
583 
584       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
585 
586       if (fin) {
587         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
588         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
589 
590         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
591       }
592       if (bout) {
593         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
594         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
595 
596         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
597       }
598 
599       if (fout) {
600         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
601         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
602 
603         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
604       }
605 #else
606       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
607       if (fin) {
608         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
609         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
610 
611         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
612       }
613       if (fout) {
614         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
615         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
616 
617         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
618       }
619       if (bout) {
620         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
621         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
622 
623         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
624       }
625 #endif
626       break;
627     }
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 #undef __FUNCT__
633 #define __FUNCT__ "VecScatterPetscToFFTW"
634 /*@
635    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
636 
637    Collective on Mat
638 
639    Input Parameters:
640 +  A - FFTW matrix
641 -  x - the PETSc vector
642 
643    Output Parameters:
644 .  y - the FFTW vector
645 
646   Options Database Keys:
647 . -mat_fftw_plannerflags - set FFTW planner flags
648 
649    Level: intermediate
650 
651    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
652          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
653          zeros. This routine does that job by scattering operation.
654 
655 .seealso: VecScatterFFTWToPetsc()
656 @*/
657 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
658 {
659   PetscErrorCode ierr;
660 
661   PetscFunctionBegin;
662   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW"
668 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
669 {
670   PetscErrorCode ierr;
671   MPI_Comm       comm;
672   Mat_FFT        *fft  = (Mat_FFT*)A->data;
673   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
674   PetscInt       N     =fft->N;
675   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
676   PetscInt       low;
677   PetscMPIInt    rank,size;
678   PetscInt       vsize,vsize1;
679   //ptrdiff_t      alloc_local,local_n0,local_0_start;
680   ptrdiff_t      local_n0,local_0_start;
681   ptrdiff_t      local_n1,local_1_start;
682   VecScatter     vecscat;
683   IS             list1,list2;
684 #if !defined(PETSC_USE_COMPLEX)
685   PetscInt       i,j,k,partial_dim;
686   PetscInt       *indx1, *indx2, tempindx, tempindx1;
687   PetscInt       N1, n1,NM;
688   ptrdiff_t      temp;
689 #endif
690 
691   PetscFunctionBegin;
692   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
693   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
694   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
695   ierr = VecGetOwnershipRange(y,&low,NULL);
696 
697   if (size==1) {
698     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
699     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
700     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
701     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
702     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
703     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
704     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
705     ierr = ISDestroy(&list1);CHKERRQ(ierr);
706   } else {
707     switch (ndim) {
708     case 1:
709 #if defined(PETSC_USE_COMPLEX)
710       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
711 
712       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);
713       ierr = ISCreateStride(comm,local_n0,low,1,&list2);
714       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
715       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
716       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
717       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
718       ierr = ISDestroy(&list1);CHKERRQ(ierr);
719       ierr = ISDestroy(&list2);CHKERRQ(ierr);
720 #else
721       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
722 #endif
723       break;
724     case 2:
725 #if defined(PETSC_USE_COMPLEX)
726       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
727 
728       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
729       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
730       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
731       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
732       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
733       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
734       ierr = ISDestroy(&list1);CHKERRQ(ierr);
735       ierr = ISDestroy(&list2);CHKERRQ(ierr);
736 #else
737       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
738 
739       N1   = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
740       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
741       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
742 
743       if (dim[1]%2==0) {
744         NM = dim[1]+2;
745       } else {
746         NM = dim[1]+1;
747       }
748       for (i=0; i<local_n0; i++) {
749         for (j=0; j<dim[1]; j++) {
750           tempindx  = i*dim[1] + j;
751           tempindx1 = i*NM + j;
752 
753           indx1[tempindx]=local_0_start*dim[1]+tempindx;
754           indx2[tempindx]=low+tempindx1;
755         }
756       }
757 
758       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
759       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
760 
761       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
762       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
763       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
764       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
765       ierr = ISDestroy(&list1);CHKERRQ(ierr);
766       ierr = ISDestroy(&list2);CHKERRQ(ierr);
767       ierr = PetscFree(indx1);CHKERRQ(ierr);
768       ierr = PetscFree(indx2);CHKERRQ(ierr);
769 #endif
770       break;
771 
772     case 3:
773 #if defined(PETSC_USE_COMPLEX)
774       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
775 
776       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
777       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
778       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
779       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
780       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
781       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
782       ierr = ISDestroy(&list1);CHKERRQ(ierr);
783       ierr = ISDestroy(&list2);CHKERRQ(ierr);
784 #else
785       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
786 
787       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
788 
789       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
790       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
791 
792       if (dim[2]%2==0) NM = dim[2]+2;
793       else             NM = dim[2]+1;
794 
795       for (i=0; i<local_n0; i++) {
796         for (j=0; j<dim[1]; j++) {
797           for (k=0; k<dim[2]; k++) {
798             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
799             tempindx1 = i*dim[1]*NM + j*NM + k;
800 
801             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
802             indx2[tempindx]=low+tempindx1;
803           }
804         }
805       }
806 
807       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
808       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
809       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
810       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
811       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
812       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
813       ierr = ISDestroy(&list1);CHKERRQ(ierr);
814       ierr = ISDestroy(&list2);CHKERRQ(ierr);
815       ierr = PetscFree(indx1);CHKERRQ(ierr);
816       ierr = PetscFree(indx2);CHKERRQ(ierr);
817 #endif
818       break;
819 
820     default:
821 #if defined(PETSC_USE_COMPLEX)
822       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
823 
824       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
825       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
826       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
827       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
828       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
829       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
830       ierr = ISDestroy(&list1);CHKERRQ(ierr);
831       ierr = ISDestroy(&list2);CHKERRQ(ierr);
832 #else
833       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
834 
835       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
836 
837       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
838 
839       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
840 
841       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
842 
843       partial_dim = fftw->partial_dim;
844 
845       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
846       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
847 
848       if (dim[ndim-1]%2==0) NM = 2;
849       else                  NM = 1;
850 
851       j = low;
852       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
853         indx1[i] = local_0_start*partial_dim + i;
854         indx2[i] = j;
855         if (k%dim[ndim-1]==0) j+=NM;
856         j++;
857       }
858       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
859       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
860       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
861       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
863       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
864       ierr = ISDestroy(&list1);CHKERRQ(ierr);
865       ierr = ISDestroy(&list2);CHKERRQ(ierr);
866       ierr = PetscFree(indx1);CHKERRQ(ierr);
867       ierr = PetscFree(indx2);CHKERRQ(ierr);
868 #endif
869       break;
870     }
871   }
872   PetscFunctionReturn(0);
873 }
874 
875 #undef __FUNCT__
876 #define __FUNCT__ "VecScatterFFTWToPetsc"
877 /*@
878    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
879 
880    Collective on Mat
881 
882     Input Parameters:
883 +   A - FFTW matrix
884 -   x - FFTW vector
885 
886    Output Parameters:
887 .  y - PETSc vector
888 
889    Level: intermediate
890 
891    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
892          VecScatterFFTWToPetsc removes those extra zeros.
893 
894 .seealso: VecScatterPetscToFFTW()
895 @*/
896 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
897 {
898   PetscErrorCode ierr;
899 
900   PetscFunctionBegin;
901   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
902   PetscFunctionReturn(0);
903 }
904 
905 #undef __FUNCT__
906 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
907 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
908 {
909   PetscErrorCode ierr;
910   MPI_Comm       comm;
911   Mat_FFT        *fft  = (Mat_FFT*)A->data;
912   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
913   PetscInt       N     = fft->N;
914   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
915   PetscInt       low;
916   PetscMPIInt    rank,size;
917   ptrdiff_t      local_n0,local_0_start;
918   ptrdiff_t      local_n1,local_1_start;
919   VecScatter     vecscat;
920   IS             list1,list2;
921 #if !defined(PETSC_USE_COMPLEX)
922   PetscInt  i,j,k,partial_dim;
923   PetscInt  *indx1, *indx2, tempindx, tempindx1;
924   PetscInt  N1, n1,NM;
925   ptrdiff_t temp;
926 #endif
927 
928   PetscFunctionBegin;
929   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
930   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
931   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
932   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
933 
934   if (size==1) {
935     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
936     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
937     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
938     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
939     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
940     ierr = ISDestroy(&list1);CHKERRQ(ierr);
941 
942   } else {
943     switch (ndim) {
944     case 1:
945 #if defined(PETSC_USE_COMPLEX)
946       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
947 
948       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);
949       ierr = ISCreateStride(comm,local_n1,low,1,&list2);
950       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
951       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
952       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
953       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
954       ierr = ISDestroy(&list1);CHKERRQ(ierr);
955       ierr = ISDestroy(&list2);CHKERRQ(ierr);
956 #else
957       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
958 #endif
959       break;
960     case 2:
961 #if defined(PETSC_USE_COMPLEX)
962       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
963 
964       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
965       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
966       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
967       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
969       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
970       ierr = ISDestroy(&list1);CHKERRQ(ierr);
971       ierr = ISDestroy(&list2);CHKERRQ(ierr);
972 #else
973       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
974 
975       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
976 
977       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
978       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
979 
980       if (dim[1]%2==0) NM = dim[1]+2;
981       else             NM = dim[1]+1;
982 
983       for (i=0; i<local_n0; i++) {
984         for (j=0; j<dim[1]; j++) {
985           tempindx = i*dim[1] + j;
986           tempindx1 = i*NM + j;
987 
988           indx1[tempindx]=local_0_start*dim[1]+tempindx;
989           indx2[tempindx]=low+tempindx1;
990         }
991       }
992 
993       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
994       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
995 
996       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
997       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
998       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
999       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1000       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1001       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1002       ierr = PetscFree(indx1);CHKERRQ(ierr);
1003       ierr = PetscFree(indx2);CHKERRQ(ierr);
1004 #endif
1005       break;
1006     case 3:
1007 #if defined(PETSC_USE_COMPLEX)
1008       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1009 
1010       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1011       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
1012       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1013       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1014       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1015       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1016       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1017       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1018 #else
1019 
1020       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1021 
1022       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
1023 
1024       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1025       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
1026 
1027       if (dim[2]%2==0) NM = dim[2]+2;
1028       else             NM = dim[2]+1;
1029 
1030       for (i=0; i<local_n0; i++) {
1031         for (j=0; j<dim[1]; j++) {
1032           for (k=0; k<dim[2]; k++) {
1033             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
1034             tempindx1 = i*dim[1]*NM + j*NM + k;
1035 
1036             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1037             indx2[tempindx]=low+tempindx1;
1038           }
1039         }
1040       }
1041 
1042       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1043       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1044 
1045       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1046       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1047       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1048       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1049       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1050       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1051       ierr = PetscFree(indx1);CHKERRQ(ierr);
1052       ierr = PetscFree(indx2);CHKERRQ(ierr);
1053 #endif
1054       break;
1055     default:
1056 #if defined(PETSC_USE_COMPLEX)
1057       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1058 
1059       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1060       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1061       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1062       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1063       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1064       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1065       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1066       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1067 #else
1068       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1069 
1070       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1071 
1072       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1073 
1074       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1075 
1076       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1077 
1078       partial_dim = fftw->partial_dim;
1079 
1080       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1081       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1082 
1083       if (dim[ndim-1]%2==0) NM = 2;
1084       else                  NM = 1;
1085 
1086       j = low;
1087       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1088         indx1[i] = local_0_start*partial_dim + i;
1089         indx2[i] = j;
1090         if (k%dim[ndim-1]==0) j+=NM;
1091         j++;
1092       }
1093       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1094       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1095 
1096       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1097       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1098       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1099       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1100       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1101       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1102       ierr = PetscFree(indx1);CHKERRQ(ierr);
1103       ierr = PetscFree(indx2);CHKERRQ(ierr);
1104 #endif
1105       break;
1106     }
1107   }
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 #undef __FUNCT__
1112 #define __FUNCT__ "MatCreate_FFTW"
1113 /*
1114     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1115 
1116   Options Database Keys:
1117 + -mat_fftw_plannerflags - set FFTW planner flags
1118 
1119    Level: intermediate
1120 
1121 */
1122 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1123 {
1124   PetscErrorCode ierr;
1125   MPI_Comm       comm;
1126   Mat_FFT        *fft=(Mat_FFT*)A->data;
1127   Mat_FFTW       *fftw;
1128   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
1129   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1130   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1131   PetscBool      flg;
1132   PetscInt       p_flag,partial_dim=1,ctr;
1133   PetscMPIInt    size,rank;
1134   ptrdiff_t      *pdim;
1135   ptrdiff_t      local_n1,local_1_start;
1136 #if !defined(PETSC_USE_COMPLEX)
1137   ptrdiff_t temp;
1138   PetscInt  N1,tot_dim;
1139 #else
1140   PetscInt n1;
1141 #endif
1142 
1143   PetscFunctionBegin;
1144   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1145   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1146   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1147 
1148   fftw_mpi_init();
1149   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
1150   pdim[0] = dim[0];
1151 #if !defined(PETSC_USE_COMPLEX)
1152   tot_dim = 2*dim[0];
1153 #endif
1154   for (ctr=1; ctr<ndim; ctr++) {
1155     partial_dim *= dim[ctr];
1156     pdim[ctr]    = dim[ctr];
1157 #if !defined(PETSC_USE_COMPLEX)
1158     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1159     else             tot_dim *= dim[ctr];
1160 #endif
1161   }
1162 
1163   if (size == 1) {
1164 #if defined(PETSC_USE_COMPLEX)
1165     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1166     n    = N;
1167 #else
1168     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1169     n    = tot_dim;
1170 #endif
1171 
1172   } else {
1173     ptrdiff_t local_n0,local_0_start;
1174     switch (ndim) {
1175     case 1:
1176 #if !defined(PETSC_USE_COMPLEX)
1177       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1178 #else
1179       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1180 
1181       n    = (PetscInt)local_n0;
1182       n1   = (PetscInt)local_n1;
1183       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1184 #endif
1185       break;
1186     case 2:
1187 #if defined(PETSC_USE_COMPLEX)
1188       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1189       /*
1190        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1191        PetscSynchronizedFlush(comm,PETSC_STDOUT);
1192        */
1193       n    = (PetscInt)local_n0*dim[1];
1194       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1195 #else
1196       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1197 
1198       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
1199       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1200 #endif
1201       break;
1202     case 3:
1203 #if defined(PETSC_USE_COMPLEX)
1204       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1205 
1206       n    = (PetscInt)local_n0*dim[1]*dim[2];
1207       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1208 #else
1209       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1210 
1211       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1212       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1213 #endif
1214       break;
1215     default:
1216 #if defined(PETSC_USE_COMPLEX)
1217       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1218 
1219       n    = (PetscInt)local_n0*partial_dim;
1220       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1221 #else
1222       temp = pdim[ndim-1];
1223 
1224       pdim[ndim-1] = temp/2 + 1;
1225 
1226       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1227 
1228       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1229       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1230 
1231       pdim[ndim-1] = temp;
1232 
1233       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1234 #endif
1235       break;
1236     }
1237   }
1238   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1239   ierr      = PetscNewLog(A,&fftw);CHKERRQ(ierr);
1240   fft->data = (void*)fftw;
1241 
1242   fft->n            = n;
1243   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1244   fftw->partial_dim = partial_dim;
1245 
1246   ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr);
1247   if (size == 1) {
1248     /* ierr = PetscMalloc(ndim, &(fftw->iodims));CHKERRQ(ierr);  error! */
1249     /* ierr = PetscMalloc(ndim*sizeof(fftw_iodim),&(fftw->iodims));CHKERRQ(ierr); -not sure if this is ok */
1250     fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1251   }
1252 
1253   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1254 
1255   fftw->p_forward  = 0;
1256   fftw->p_backward = 0;
1257   fftw->p_flag     = FFTW_ESTIMATE;
1258 
1259   if (size == 1) {
1260     A->ops->mult          = MatMult_SeqFFTW;
1261     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1262   } else {
1263     A->ops->mult          = MatMult_MPIFFTW;
1264     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1265   }
1266   fft->matdestroy = MatDestroy_FFTW;
1267   A->assembled    = PETSC_TRUE;
1268   A->preallocated = PETSC_TRUE;
1269 
1270   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1273 
1274   /* get runtime options */
1275   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1276   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr);
1277   if (flg) {
1278     fftw->p_flag = iplans[p_flag];
1279   }
1280   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 
1285 
1286 
1287