xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 07703244eb00794e20eb3f0b2f7f7fad9229c214)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   PetscInt   partial_dim;
15   fftw_plan   p_forward,p_backward;
16   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18                                                             executed for the arrays with which the plan was created */
19 } Mat_FFTW;
20 
21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25 extern PetscErrorCode MatDestroy_FFTW(Mat);
26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
28 extern PetscErrorCode MatGetVecsFFT_FFTW(Mat,Vec*,Vec*,Vec*);
29 
30 #undef __FUNCT__
31 #define __FUNCT__ "MatMult_SeqFFTW"
32 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
33 {
34   PetscErrorCode ierr;
35   Mat_FFT        *fft  = (Mat_FFT*)A->data;
36   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
37   PetscScalar    *x_array,*y_array;
38   PetscInt       ndim=fft->ndim,*dim=fft->dim;
39 
40   PetscFunctionBegin;
41   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
42   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
43   if (!fftw->p_forward){ /* create a plan, then excute it */
44     switch (ndim){
45     case 1:
46 #if defined(PETSC_USE_COMPLEX)
47       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
48 #else
49       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
50 #endif
51       break;
52     case 2:
53 #if defined(PETSC_USE_COMPLEX)
54       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
55 #else
56       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
57 #endif
58       break;
59     case 3:
60 #if defined(PETSC_USE_COMPLEX)
61       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
62 #else
63       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
64 #endif
65       break;
66     default:
67 #if defined(PETSC_USE_COMPLEX)
68       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
69 #else
70       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
71 #endif
72       break;
73     }
74     fftw->finarray  = x_array;
75     fftw->foutarray = y_array;
76     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
77                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
78     fftw_execute(fftw->p_forward);
79   } else { /* use existing plan */
80     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
81       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
82     } else {
83       fftw_execute(fftw->p_forward);
84     }
85   }
86   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
87   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
93 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
94 {
95   PetscErrorCode ierr;
96   Mat_FFT        *fft = (Mat_FFT*)A->data;
97   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
98   PetscScalar    *x_array,*y_array;
99   PetscInt       ndim=fft->ndim,*dim=fft->dim;
100 
101   PetscFunctionBegin;
102   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
103   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
104   if (!fftw->p_backward){ /* create a plan, then excute it */
105     switch (ndim){
106     case 1:
107 #if defined(PETSC_USE_COMPLEX)
108       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
109 #else
110       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
111 #endif
112       break;
113     case 2:
114 #if defined(PETSC_USE_COMPLEX)
115       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
116 #else
117       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
118 #endif
119       break;
120     case 3:
121 #if defined(PETSC_USE_COMPLEX)
122       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
123 #else
124       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
125 #endif
126       break;
127     default:
128 #if defined(PETSC_USE_COMPLEX)
129       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
130 #else
131       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
132 #endif
133       break;
134     }
135     fftw->binarray  = x_array;
136     fftw->boutarray = y_array;
137     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
138   } else { /* use existing plan */
139     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
140       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
141     } else {
142       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
143     }
144   }
145   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
146   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
147   PetscFunctionReturn(0);
148 }
149 
150 #undef __FUNCT__
151 #define __FUNCT__ "MatMult_MPIFFTW"
152 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
153 {
154   PetscErrorCode ierr;
155   Mat_FFT        *fft  = (Mat_FFT*)A->data;
156   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
157   PetscScalar    *x_array,*y_array;
158   PetscInt       ndim=fft->ndim,*dim=fft->dim;
159   MPI_Comm       comm=((PetscObject)A)->comm;
160 
161   PetscFunctionBegin;
162   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
163   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
164   if (!fftw->p_forward){ /* create a plan, then excute it */
165     switch (ndim){
166     case 1:
167 #if defined(PETSC_USE_COMPLEX)
168       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
169 #else
170       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
171 #endif
172       break;
173     case 2:
174 #if defined(PETSC_USE_COMPLEX)
175       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
176 #else
177       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
178 #endif
179       break;
180     case 3:
181 #if defined(PETSC_USE_COMPLEX)
182       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
183 #else
184       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
185 #endif
186       break;
187     default:
188 #if defined(PETSC_USE_COMPLEX)
189       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
190 #else
191       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
192 #endif
193       break;
194     }
195     fftw->finarray  = x_array;
196     fftw->foutarray = y_array;
197     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
198                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
199     fftw_execute(fftw->p_forward);
200   } else { /* use existing plan */
201     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
202       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
203     } else {
204       fftw_execute(fftw->p_forward);
205     }
206   }
207   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
208   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 #undef __FUNCT__
213 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
214 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
215 {
216   PetscErrorCode ierr;
217   Mat_FFT        *fft  = (Mat_FFT*)A->data;
218   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
219   PetscScalar    *x_array,*y_array;
220   PetscInt       ndim=fft->ndim,*dim=fft->dim;
221   MPI_Comm       comm=((PetscObject)A)->comm;
222 
223   PetscFunctionBegin;
224   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
225   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
226   if (!fftw->p_backward){ /* create a plan, then excute it */
227     switch (ndim){
228     case 1:
229 #if defined(PETSC_USE_COMPLEX)
230       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
231 #else
232       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
233 #endif
234       break;
235     case 2:
236 #if defined(PETSC_USE_COMPLEX)
237       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
238 #else
239       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
240 #endif
241       break;
242     case 3:
243 #if defined(PETSC_USE_COMPLEX)
244       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
245 #else
246       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
247 #endif
248       break;
249     default:
250 #if defined(PETSC_USE_COMPLEX)
251       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
252 #else
253       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
254 #endif
255       break;
256     }
257     fftw->binarray  = x_array;
258     fftw->boutarray = y_array;
259     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
260   } else { /* use existing plan */
261     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
262       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
263     } else {
264       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
265     }
266   }
267   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
268   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatDestroy_FFTW"
274 PetscErrorCode MatDestroy_FFTW(Mat A)
275 {
276   Mat_FFT        *fft = (Mat_FFT*)A->data;
277   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
278   PetscErrorCode ierr;
279 
280   PetscFunctionBegin;
281   fftw_destroy_plan(fftw->p_forward);
282   fftw_destroy_plan(fftw->p_backward);
283   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
284   ierr = PetscFree(fft->data);CHKERRQ(ierr);
285   PetscFunctionReturn(0);
286 }
287 
288 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
289 #undef __FUNCT__
290 #define __FUNCT__ "VecDestroy_MPIFFTW"
291 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
292 {
293   PetscErrorCode  ierr;
294   PetscScalar     *array;
295 
296   PetscFunctionBegin;
297   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
298   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
299   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
300   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
301   PetscFunctionReturn(0);
302 }
303 
304 #undef __FUNCT__
305 #define __FUNCT__ "MatGetVecs1DC_FFTW"
306 /*
307    MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the
308      parallel layout determined by FFTW-1D
309 
310 */
311 PetscErrorCode  MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bout)
312 {
313   PetscErrorCode ierr;
314   PetscMPIInt    size,rank;
315   MPI_Comm       comm=((PetscObject)A)->comm;
316   Mat_FFT        *fft = (Mat_FFT*)A->data;
317 //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
318   PetscInt       N=fft->N;
319   PetscInt       ndim=fft->ndim,*dim=fft->dim;
320   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
321   ptrdiff_t      f_local_n1,f_local_1_end;
322   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
323   ptrdiff_t      b_local_n1,b_local_1_end;
324   fftw_complex   *data_fin,*data_fout,*data_bout;
325 
326   PetscFunctionBegin;
327 #if !defined(PETSC_USE_COMPLEX)
328   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
329 #endif
330   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
331   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
332   if (size == 1){
333     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
334   }
335   else {
336       if (ndim>1){
337         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
338       else {
339           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
340           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
341           if (fin) {
342             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
343             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
344             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
345           }
346           if (fout) {
347             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
348             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
349             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
350           }
351           if (bout) {
352             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
353             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
354             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
355           }
356   }
357   if (fin){
358     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
359   }
360   if (fout){
361     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
362   }
363   if (bout){
364     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
365   }
366   PetscFunctionReturn(0);
367 }
368 
369 
370 }
371 
372 
373 #undef __FUNCT__
374 #define __FUNCT__ "MatGetVecsFFT"
375 PetscErrorCode MatGetVecsFFT(Mat A,Vec *x,Vec *y,Vec *z)
376 {
377   PetscErrorCode ierr;
378   PetscFunctionBegin;
379   ierr = PetscTryMethod(A,"MatGetVecsFFT_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
380   PetscFunctionReturn(0);
381 }
382 
383 #undef __FUNCT__
384 #define __FUNCT__ "MatGetVecsFFT_FFTW"
385 /*
386    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
387      parallel layout determined by FFTW
388 
389    Collective on Mat
390 
391    Input Parameter:
392 .  mat - the matrix
393 
394    Output Parameter:
395 +   fin - (optional) input vector of forward FFTW
396 -   fout - (optional) output vector of forward FFTW
397 
398   Level: advanced
399 
400 .seealso: MatCreateFFTW()
401 */
402 EXTERN_C_BEGIN
403 PetscErrorCode  MatGetVecsFFT_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
404 {
405   PetscErrorCode ierr;
406   PetscMPIInt    size,rank;
407   MPI_Comm       comm=((PetscObject)A)->comm;
408   Mat_FFT        *fft = (Mat_FFT*)A->data;
409   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
410   PetscInt       N=fft->N, N1, n1,vsize;
411   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
412 
413   PetscFunctionBegin;
414   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
415   PetscValidType(A,1);
416 
417   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
418   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
419   printf("size is %d\n",size);
420   if (size == 1){ /* sequential case */
421   printf("Routine is getting called\n");
422 #if defined(PETSC_USE_COMPLEX)
423     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
424     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
425     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
426 #else
427     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
428     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
429     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);}
430 #endif
431   } else {
432     ptrdiff_t      alloc_local,local_n0,local_0_start;
433     ptrdiff_t      local_n1,local_1_end;
434     fftw_complex   *data_fin,*data_fout,data_bout;
435     double         *data_finr,*data_boutr ;
436     ptrdiff_t      local_1_start,temp;
437     switch (ndim){
438           case 1:
439                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet");
440           case 2:
441 #if !defined(PETSC_USE_COMPLEX)
442                  alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
443                  N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
444                  if (fin) {
445                            data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
446                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
447                            ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
448                            //printf("The code comes here with vector size %d\n",vsize);
449                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
450                           }
451                  if (fout) {
452                             data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
453                             ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
454                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
455                            }
456                  if (bout) {
457                            data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
458                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
459                            ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr);
460                            //printf("The code comes here with vector size %d\n",vsize);
461                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
462                           }
463 
464                            //printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
465 
466 #else
467       /* Get local size */
468                  alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
469                  if (fin) {
470                            data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
471                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
472                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
473                           }
474                  if (fout) {
475                             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
476                             ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
477                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
478                            }
479                  if (bout) {
480                            data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
481                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
482                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
483                           }
484 
485      //printf("Hope this does not come here");
486 #endif
487       break;
488 
489           case 3:
490 #if !defined(PETSC_USE_COMPLEX)
491                  alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
492                  N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
493                  if (fin) {
494                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
495                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
496                          ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
497         //printf("The code comes here with vector size %d\n",vsize);
498                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
499                          }
500                  if (fout) {
501                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
502                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
503                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
504                           }
505                  if (bout) {
506                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
507                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
508                         // ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
509         //printf("The code comes here with vector size %d\n",vsize);
510                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
511                           }
512 
513       //printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
514 
515 #else
516                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet");
517 #endif
518                  break;
519           default:
520 #if !defined(PETSC_USE_COMPLEX)
521                  temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
522                  printf("The value of temp is %ld\n",temp);
523                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
524                  alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
525                  N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
526                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
527 
528                  if (fin) {
529                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
530                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
531                          ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
532         //printf("The code comes here with vector size %d\n",vsize);
533                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
534                         }
535                  if (fout) {
536                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
537                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
538                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
539                         }
540                  if (bout) {
541                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
542                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
543                          //ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
544         //printf("The code comes here with vector size %d\n",vsize);
545                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
546                         }
547 
548 #else
549 
550                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet");
551 #endif
552             break;
553           }
554   }
555   PetscFunctionReturn(0);
556 }
557 EXTERN_C_END
558 
559 
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatGetVecs_FFTW"
563 /*
564    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
565      parallel layout determined by FFTW
566 
567    Collective on Mat
568 
569    Input Parameter:
570 .  mat - the matrix
571 
572    Output Parameter:
573 +   fin - (optional) input vector of forward FFTW
574 -   fout - (optional) output vector of forward FFTW
575 
576   Level: advanced
577 
578 .seealso: MatCreateFFTW()
579 */
580 PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
581 {
582   PetscErrorCode ierr;
583   PetscMPIInt    size,rank;
584   MPI_Comm       comm=((PetscObject)A)->comm;
585   Mat_FFT        *fft = (Mat_FFT*)A->data;
586   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
587   PetscInt       N=fft->N, N1, n1,vsize;
588   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
589 
590   PetscFunctionBegin;
591   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
592   PetscValidType(A,1);
593 
594   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
595   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
596   if (size == 1){ /* sequential case */
597 #if defined(PETSC_USE_COMPLEX)
598     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
599     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
600 #else
601     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
602     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
603 #endif
604   } else {        /* mpi case */
605     ptrdiff_t      alloc_local,local_n0,local_0_start;
606     ptrdiff_t      local_n1,local_1_end;
607     fftw_complex   *data_fin,*data_fout;
608     double         *data_finr ;
609     ptrdiff_t      local_1_start,temp;
610 //    PetscInt ctr;
611 //    ptrdiff_t      ndim1,*pdim;
612 //    ndim1=(ptrdiff_t) ndim;
613 //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
614 
615 //    for(ctr=0;ctr<ndim;ctr++)
616 //        {k
617 //           pdim[ctr] = dim[ctr];
618 //       }
619 
620 
621 
622     switch (ndim){
623     case 1:
624       /* Get local size */
625       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
626 //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
627       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
628       if (fin) {
629         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
630         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
631         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
632       }
633       if (fout) {
634         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
635         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
636         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
637       }
638       break;
639     case 2:
640 #if !defined(PETSC_USE_COMPLEX)
641       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
642       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
643       if (fin) {
644         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
645         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
646         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
647         //printf("The code comes here with vector size %d\n",vsize);
648         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
649       }
650       if (fout) {
651         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
652         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
653         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
654       }
655       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
656 
657 #else
658       /* Get local size */
659      //printf("Hope this does not come here");
660       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
661       if (fin) {
662         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
663         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
664         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
665       }
666       if (fout) {
667         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
668         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
669         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
670       }
671      //printf("Hope this does not come here");
672 #endif
673       break;
674     case 3:
675       /* Get local size */
676 #if !defined(PETSC_USE_COMPLEX)
677       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
678       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
679       if (fin) {
680         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
681         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
682         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
683         //printf("The code comes here with vector size %d\n",vsize);
684         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
685       }
686       if (fout) {
687         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
688         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
689         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
690       }
691       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
692 
693 
694 #else
695       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
696 //      printf("The quantity n is %d",n);
697       if (fin) {
698         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
699         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
700         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
701       }
702       if (fout) {
703         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
704         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
705         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
706       }
707 #endif
708       break;
709     default:
710       /* Get local size */
711 #if !defined(PETSC_USE_COMPLEX)
712       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
713       printf("The value of temp is %ld\n",temp);
714       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
715       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
716       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
717       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
718       if (fin) {
719         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
720         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
721         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
722         //printf("The code comes here with vector size %d\n",vsize);
723         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
724       }
725       if (fout) {
726         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
727         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
728         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
729       }
730 
731 #else
732       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
733 //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
734 //      printf("The value of alloc local is %d",alloc_local);
735 //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
736 //      for(i=0;i<ndim;i++)
737 //         {
738 //          pdim[i]=dim[i];printf("%d",pdim[i]);
739 //         }
740 //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
741 //      printf("The quantity n is %d",n);
742       if (fin) {
743         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
744         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
745         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
746       }
747       if (fout) {
748         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
749         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
750         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
751       }
752 #endif
753       break;
754     }
755   }
756 //  if (fin){
757 //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
758 //  }
759 //  if (fout){
760 //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
761 //  }
762   PetscFunctionReturn(0);
763 }
764 
765 #undef __FUNCT__
766 #define __FUNCT__ "InputTransformFFT"
767 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
768 {
769   PetscErrorCode ierr;
770   PetscFunctionBegin;
771   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
772   PetscFunctionReturn(0);
773 }
774 
775 /*
776       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block
777   Input A, x, y
778   A - FFTW matrix
779   x - user data
780   Options Database Keys:
781 + -mat_fftw_plannerflags - set FFTW planner flags
782 
783    Level: intermediate
784 
785 */
786 
787 EXTERN_C_BEGIN
788 #undef __FUNCT__
789 #define __FUNCT__ "InputTransformFFT_FTTW"
790 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
791 {
792   PetscErrorCode ierr;
793   MPI_Comm       comm=((PetscObject)A)->comm;
794   Mat_FFT        *fft = (Mat_FFT*)A->data;
795   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
796   PetscInt       N=fft->N, N1, n1 ,NM;
797   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
798   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
799   PetscInt       i,j,k,rank,size,partial_dim;
800   ptrdiff_t      alloc_local,local_n0,local_0_start;
801   ptrdiff_t      local_n1,local_1_start,temp;
802   VecScatter     vecscat;
803   IS             list1,list2;
804 
805   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
806   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
807   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
808   printf("Local ownership starts at %d\n",low);
809 
810   if (size==1)
811     {
812 /*     switch (ndim){
813      case 1:
814           ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
815           for (i=0;i<dim[0];i++)
816              {
817               indx1[i] = i;
818              }
819           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
820           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
821           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
822           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
823           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
824           ierr = ISDestroy(&list1);CHKERRQ(ierr);
825           ierr = PetscFree(indx1);CHKERRQ(ierr);
826      break;
827 
828      case 2:
829           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
830           for (i=0;i<dim[0];i++){
831              for (j=0;j<dim[1];j++){
832                 indx1[i*dim[1]+j] = i*dim[1] + j;
833              }
834           }
835           ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
836           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
837           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
838           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
839           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
840           ierr = ISDestroy(&list1);CHKERRQ(ierr);
841           ierr = PetscFree(indx1);CHKERRQ(ierr);
842           break;
843      case 3:
844           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
845           for (i=0;i<dim[0];i++){
846              for (j=0;j<dim[1];j++){
847                 for (k=0;k<dim[2];k++){
848                    indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
849                 }
850              }
851           }
852           ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
853           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
854           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
855           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
856           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
857           ierr = ISDestroy(&list1);CHKERRQ(ierr);
858           ierr = PetscFree(indx1);CHKERRQ(ierr);
859           break;
860      default:
861 */
862           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);
863           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
864           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
865           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
866           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
867           ierr = ISDestroy(&list1);CHKERRQ(ierr);
868           //ierr = ISDestroy(list1);CHKERRQ(ierr);
869  //         break;
870   //    }
871     }
872 
873  else{
874 
875  switch (ndim){
876  case 1:
877 #if defined(PETSC_USE_COMPLEX)
878       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
879       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
880       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
881       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
882       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
883       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
884       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
887       ierr = ISDestroy(&list1);CHKERRQ(ierr);
888       ierr = ISDestroy(&list2);CHKERRQ(ierr);
889       break;
890 #else
891   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
892 #endif
893   break;
894  case 2:
895 #if defined(PETSC_USE_COMPLEX)
896       //PetscInt my_value;
897       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
898       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
899       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
900       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
901       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
902       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
903       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
904       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
905       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
906       ierr = ISDestroy(&list1);CHKERRQ(ierr);
907       ierr = ISDestroy(&list2);CHKERRQ(ierr);
908       break;
909 #else
910       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
911       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
912 
913       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
914       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
915       printf("Val local_0_start = %ld\n",local_0_start);
916 
917       if (dim[1]%2==0)
918         NM = dim[1]+2;
919       else
920         NM = dim[1]+1;
921 
922       for (i=0;i<local_n0;i++){
923          for (j=0;j<dim[1];j++){
924             tempindx = i*dim[1] + j;
925             tempindx1 = i*NM + j;
926             indx1[tempindx]=local_0_start*dim[1]+tempindx;
927             indx2[tempindx]=low+tempindx1;
928            // printf("Val tempindx1 = %d\n",tempindx1);
929   //          printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
930   //          printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
931   //          printf("-------------------------\n",indx2[tempindx],rank);
932         }
933      }
934 
935       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
936       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
937 
938       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
939       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
941       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
942       ierr = ISDestroy(&list1);CHKERRQ(ierr);
943       ierr = ISDestroy(&list2);CHKERRQ(ierr);
944       ierr = PetscFree(indx1);CHKERRQ(ierr);
945       ierr = PetscFree(indx2);CHKERRQ(ierr);
946       break;
947 #endif
948 
949  case 3:
950       #if defined(PETSC_USE_COMPLEX)
951       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
952       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
953       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
954       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
955       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
956       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
957       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
958       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
960       ierr = ISDestroy(&list1);CHKERRQ(ierr);
961       ierr = ISDestroy(&list2);CHKERRQ(ierr);
962       break;
963 #else
964       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
965       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
966 
967       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
968       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
969       printf("Val local_0_start = %ld\n",local_0_start);
970 
971       if (dim[2]%2==0)
972         NM = dim[2]+2;
973       else
974         NM = dim[2]+1;
975 
976       for (i=0;i<local_n0;i++){
977          for (j=0;j<dim[1];j++){
978             for (k=0;k<dim[2];k++){
979                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
980                tempindx1 = i*dim[1]*NM + j*NM + k;
981                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
982                indx2[tempindx]=low+tempindx1;
983             }
984            // printf("Val tempindx1 = %d\n",tempindx1);
985            // printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
986            // printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
987            // printf("-------------------------\n",indx2[tempindx],rank);
988         }
989      }
990 
991       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
992       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
993 
994       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
995       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
997       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
998       ierr = ISDestroy(&list1);CHKERRQ(ierr);
999       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1000       ierr = PetscFree(indx1);CHKERRQ(ierr);
1001       ierr = PetscFree(indx2);CHKERRQ(ierr);
1002       break;
1003 #endif
1004 
1005  default:
1006 #if defined(PETSC_USE_COMPLEX)
1007       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1008       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1009       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1010       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1011       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1012       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1013       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1014       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1015       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1016       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1017       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1018 
1019 
1020 #else
1021       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1022       printf("The value of temp is %ld\n",temp);
1023       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1024       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1025       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1026       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1027 
1028       partial_dim = fftw->partial_dim;
1029       printf("The value of partial dim is %d\n",partial_dim);
1030 
1031       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1032       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1033       printf("Val local_0_start = %ld\n",local_0_start);
1034 
1035       if (dim[ndim-1]%2==0)
1036         NM = 2;
1037       else
1038         NM = 1;
1039 
1040       j = low;
1041       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1042          {
1043           indx1[i] = local_0_start*partial_dim + i;
1044           indx2[i] = j;
1045           //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
1046           if (k%dim[ndim-1]==0)
1047             { j+=NM;}
1048           j++;
1049          }
1050       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1051       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1052       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1053       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1054       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1055       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1056       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1057       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1058       ierr = PetscFree(indx1);CHKERRQ(ierr);
1059       ierr = PetscFree(indx2);CHKERRQ(ierr);
1060       break;
1061 #endif
1062   }
1063  }
1064 
1065  return 0;
1066 }
1067 EXTERN_C_END
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "OutputTransformFFT"
1071 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
1072 {
1073   PetscErrorCode ierr;
1074   PetscFunctionBegin;
1075   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 /*
1080       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
1081   Input A, x, y
1082   A - FFTW matrix
1083   x - FFTW vector
1084   y - PETSc vector that user can read
1085   Options Database Keys:
1086 + -mat_fftw_plannerflags - set FFTW planner flags
1087 
1088    Level: intermediate
1089 
1090 */
1091 
1092 EXTERN_C_BEGIN
1093 #undef __FUNCT__
1094 #define __FUNCT__ "OutputTransformFFT_FTTW"
1095 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
1096 {
1097   PetscErrorCode ierr;
1098   MPI_Comm       comm=((PetscObject)A)->comm;
1099   Mat_FFT        *fft = (Mat_FFT*)A->data;
1100   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
1101   PetscInt       N=fft->N, N1, n1 ,NM;
1102   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
1103   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
1104   PetscInt       i,j,k,rank,size,partial_dim;
1105   ptrdiff_t      alloc_local,local_n0,local_0_start;
1106   ptrdiff_t      local_n1,local_1_start,temp;
1107   VecScatter     vecscat;
1108   IS             list1,list2;
1109 
1110   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1111   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1112   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);
1113 
1114   if (size==1){
1115 /*
1116     switch (ndim){
1117     case 1:
1118            ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
1119           for (i=0;i<dim[0];i++)
1120              {
1121               indx1[i] = i;
1122              }
1123           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1124           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1125           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1126           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1127           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1128           ierr = ISDestroy(&list1);CHKERRQ(ierr);
1129           ierr = PetscFree(indx1);CHKERRQ(ierr);
1130           break;
1131 
1132     case 2:
1133          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
1134           for (i=0;i<dim[0];i++){
1135              for (j=0;j<dim[1];j++){
1136                 indx1[i*dim[1]+j] = i*dim[1] + j;
1137              }
1138           }
1139          ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1140          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1141          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1142          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1143          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1144          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1145          ierr = PetscFree(indx1);CHKERRQ(ierr);
1146          break;
1147     case 3:
1148          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1149          for (i=0;i<dim[0];i++){
1150             for (j=0;j<dim[1];j++){
1151                for (k=0;k<dim[2];k++){
1152                   indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
1153                }
1154             }
1155          }
1156          ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1157          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1158          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1159          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1160          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1161          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1162          ierr = PetscFree(indx1);CHKERRQ(ierr);
1163          break;
1164     default:
1165 */
1166          ierr = ISCreateStride(comm,N,0,1,&list1);
1167          //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF);
1168          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1169          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1170          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1171          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1172          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1173   //       break;
1174    // }
1175   }
1176   else{
1177 
1178  switch (ndim){
1179  case 1:
1180 #if defined(PETSC_USE_COMPLEX)
1181       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1182       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
1183       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
1184       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1185       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1186       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1187       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1188       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1189       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1190       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1191       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1192       break;
1193 
1194 #else
1195       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
1196 #endif
1197   break;
1198  case 2:
1199 #if defined(PETSC_USE_COMPLEX)
1200       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1201       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1202       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
1203       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1204       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1205       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1206       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1207       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1208       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1209       break;
1210 #else
1211       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1212       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
1213 
1214      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1215      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1216      printf("Val local_0_start = %ld\n",local_0_start);
1217 
1218      if (dim[1]%2==0)
1219       NM = dim[1]+2;
1220     else
1221       NM = dim[1]+1;
1222 
1223 
1224 
1225      for (i=0;i<local_n0;i++){
1226         for (j=0;j<dim[1];j++){
1227             tempindx = i*dim[1] + j;
1228             tempindx1 = i*NM + j;
1229             indx1[tempindx]=local_0_start*dim[1]+tempindx;
1230             indx2[tempindx]=low+tempindx1;
1231        //     printf("Val tempindx1 = %d\n",tempindx1);
1232        //     printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1233        //     printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1234        //     printf("-------------------------\n",indx2[tempindx],rank);
1235         }
1236      }
1237 
1238      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1239      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1240 
1241      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1242      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1243      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1244      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1245      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1246      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1247      ierr = PetscFree(indx1);CHKERRQ(ierr);
1248      ierr = PetscFree(indx2);CHKERRQ(ierr);
1249   break;
1250 #endif
1251  case 3:
1252 #if defined(PETSC_USE_COMPLEX)
1253       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1254       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1255       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1256       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1257       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1258       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1259       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1260       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1261       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1262       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1263       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1264       break;
1265 #else
1266 
1267       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1268       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
1269 
1270      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1271      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1272      printf("Val local_0_start = %ld\n",local_0_start);
1273 
1274      if (dim[2]%2==0)
1275       NM = dim[2]+2;
1276     else
1277       NM = dim[2]+1;
1278 
1279      for (i=0;i<local_n0;i++){
1280         for (j=0;j<dim[1];j++){
1281            for (k=0;k<dim[2];k++){
1282               tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1283               tempindx1 = i*dim[1]*NM + j*NM + k;
1284               indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1285               indx2[tempindx]=low+tempindx1;
1286            }
1287         //    printf("Val tempindx1 = %d\n",tempindx1);
1288         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1289         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1290         //    printf("-------------------------\n",indx2[tempindx],rank);
1291         }
1292      }
1293 
1294      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1295      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1296 
1297      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1298      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1299      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1300      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1301      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1302      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1303      ierr = PetscFree(indx1);CHKERRQ(ierr);
1304      ierr = PetscFree(indx2);CHKERRQ(ierr);
1305   break;
1306 #endif
1307   default:
1308 #if defined(PETSC_USE_COMPLEX)
1309       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1310       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1311       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1312       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1313       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1314       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1315       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1316       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1317       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1318       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1319       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1320 #else
1321      temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1322      printf("The value of temp is %ld\n",temp);
1323      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1324      alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1325      N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1326      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1327 
1328      partial_dim = fftw->partial_dim;
1329      printf("The value of partial dim is %d\n",partial_dim);
1330 
1331      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1332      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1333      printf("Val local_0_start = %ld\n",local_0_start);
1334 
1335      if (dim[ndim-1]%2==0)
1336        NM = 2;
1337      else
1338        NM = 1;
1339 
1340      j = low;
1341      for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1342         {
1343          indx1[i] = local_0_start*partial_dim + i;
1344          indx2[i] = j;
1345          //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
1346          if (k%dim[ndim-1]==0)
1347            { j+=NM;}
1348          j++;
1349         }
1350      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1351      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1352 
1353       //ISView(list1,PETSC_VIEWER_STDOUT_SELF);
1354 
1355 
1356      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1357      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1358      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1359      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1360      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1361      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1362      ierr = PetscFree(indx1);CHKERRQ(ierr);
1363      ierr = PetscFree(indx2);CHKERRQ(ierr);
1364 
1365      break;
1366 #endif
1367  }
1368  }
1369  return 0;
1370 }
1371 EXTERN_C_END
1372 
1373 EXTERN_C_BEGIN
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatCreate_FFTW"
1376 /*
1377       MatCreate_FFTW - Creates a matrix object that provides FFT
1378   via the external package FFTW
1379   Options Database Keys:
1380 + -mat_fftw_plannerflags - set FFTW planner flags
1381 
1382    Level: intermediate
1383 
1384 */
1385 
1386 PetscErrorCode MatCreate_FFTW(Mat A)
1387 {
1388   PetscErrorCode ierr;
1389   MPI_Comm       comm=((PetscObject)A)->comm;
1390   Mat_FFT        *fft=(Mat_FFT*)A->data;
1391   Mat_FFTW       *fftw;
1392   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1393   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1394   PetscBool      flg;
1395   PetscInt       p_flag,partial_dim=1,ctr,N1;
1396   PetscMPIInt    size,rank;
1397   ptrdiff_t      *pdim, temp;
1398   ptrdiff_t      local_n1,local_1_start,local_1_end;
1399 
1400   PetscFunctionBegin;
1401   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1402   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1403   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1404 
1405   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
1406   pdim[0] = dim[0];
1407   for(ctr=1;ctr<ndim;ctr++)
1408       {
1409           partial_dim *= dim[ctr];
1410           pdim[ctr] = dim[ctr];
1411       }
1412 
1413   if (size == 1) {
1414 #if defined(PETSC_USE_COMPLEX)
1415     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1416     n = N;
1417 #else
1418     int tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1];
1419     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1420     n = tot_dim;
1421 #endif
1422 
1423   } else {
1424     ptrdiff_t alloc_local,local_n0,local_0_start;//local_n1,local_1_end;
1425     switch (ndim){
1426     case 1:
1427 #if !defined(PETSC_USE_COMPLEX)
1428   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1429 #else
1430       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
1431       n = (PetscInt)local_n0;
1432       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1433 #endif
1434 //      PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW);
1435       break;
1436     case 2:
1437 #if defined(PETSC_USE_COMPLEX)
1438       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1439       /*
1440        PetscMPIInt    rank;
1441        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1442        PetscSynchronizedFlush(comm);
1443        */
1444       n = (PetscInt)local_n0*dim[1];
1445       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1446 #else
1447       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1448       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1449       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1450 #endif
1451       break;
1452     case 3:
1453 //      printf("The value of alloc local is %d",alloc_local);
1454 #if defined(PETSC_USE_COMPLEX)
1455       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1456       n = (PetscInt)local_n0*dim[1]*dim[2];
1457       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1458 #else
1459       printf("The code comes here\n");
1460       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1461       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1462       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1463 #endif
1464       break;
1465     default:
1466 #if defined(PETSC_USE_COMPLEX)
1467       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1468 //      printf("The value of alloc local is %ld from process %d\n",alloc_local,rank);
1469 //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
1470       n = (PetscInt)local_n0*partial_dim;
1471 //      printf("New partial dimension is %d %d %d",n,N,ndim);
1472       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1473 #else
1474       temp = pdim[ndim-1];
1475       pdim[ndim-1]= temp/2 + 1;
1476       printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]);
1477       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1478       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1479       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1480       pdim[ndim-1] = temp;
1481       printf("For Multi dim case n = %d, N1  = %d\n",n,N1);
1482       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1483 #endif
1484       break;
1485     }
1486   }
1487   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1488   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1489   fft->data = (void*)fftw;
1490 
1491   fft->n           = n;
1492   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1493   fftw->partial_dim = partial_dim;
1494   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1495   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1496 
1497   fftw->p_forward  = 0;
1498   fftw->p_backward = 0;
1499   fftw->p_flag     = FFTW_ESTIMATE;
1500 
1501   if (size == 1){
1502     A->ops->mult          = MatMult_SeqFFTW;
1503     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1504   } else {
1505     A->ops->mult          = MatMult_MPIFFTW;
1506     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1507   }
1508   fft->matdestroy          = MatDestroy_FFTW;
1509 // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D
1510   A->ops->getvecs       = MatGetVecs_FFTW;
1511   A->assembled          = PETSC_TRUE;
1512   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFT_C","MatGetVecsFFT_FFTW",MatGetVecsFFT_FFTW);
1513   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
1514   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
1515 
1516   /* get runtime options */
1517   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1518     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1519     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1520   PetscOptionsEnd();
1521   PetscFunctionReturn(0);
1522 }
1523 EXTERN_C_END
1524 
1525 
1526 
1527 
1528