xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision b3a1736533330f0af4cf2d2850ef6b81ef8b3c71)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   fftw_plan   p_forward,p_backward;
15   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
16   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
17                                                             executed for the arrays with which the plan was created */
18 } Mat_FFTW;
19 
20 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
21 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatDestroy_FFTW(Mat);
25 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
26 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
27 extern PetscErrorCode InputTransformFFT_FFTW(Mat,Vec,Vec);
28 extern PetscErrorCode InputTransformFFT(Mat,Vec,Vec);
29 
30 #undef __FUNCT__
31 #define __FUNCT__ "MatMult_SeqFFTW"
32 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
33 {
34   PetscErrorCode ierr;
35   Mat_FFT        *fft  = (Mat_FFT*)A->data;
36   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
37   PetscScalar    *x_array,*y_array;
38   PetscInt       ndim=fft->ndim,*dim=fft->dim;
39 
40   PetscFunctionBegin;
41 #if !defined(PETSC_USE_COMPLEX)
42 
43   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
44 #endif
45   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
46   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
47   if (!fftw->p_forward){ /* create a plan, then excute it */
48     switch (ndim){
49     case 1:
50       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
51       break;
52     case 2:
53       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
54       break;
55     case 3:
56       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
57       break;
58     default:
59       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
60       break;
61     }
62     fftw->finarray  = x_array;
63     fftw->foutarray = y_array;
64     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
65                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
66     fftw_execute(fftw->p_forward);
67   } else { /* use existing plan */
68     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
69       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
70     } else {
71       fftw_execute(fftw->p_forward);
72     }
73   }
74   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
75   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 
79 #undef __FUNCT__
80 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
81 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
82 {
83   PetscErrorCode ierr;
84   Mat_FFT        *fft = (Mat_FFT*)A->data;
85   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
86   PetscScalar    *x_array,*y_array;
87   PetscInt       ndim=fft->ndim,*dim=fft->dim;
88 
89   PetscFunctionBegin;
90 #if !defined(PETSC_USE_COMPLEX)
91   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
92 #endif
93   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
94   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
95   if (!fftw->p_backward){ /* create a plan, then excute it */
96     switch (ndim){
97     case 1:
98       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
99       break;
100     case 2:
101       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
102       break;
103     case 3:
104       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
105       break;
106     default:
107       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
108       break;
109     }
110     fftw->binarray  = x_array;
111     fftw->boutarray = y_array;
112     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
113   } else { /* use existing plan */
114     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
115       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
116     } else {
117       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
118     }
119   }
120   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
121   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
122   PetscFunctionReturn(0);
123 }
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatMult_MPIFFTW"
127 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
128 {
129   PetscErrorCode ierr;
130   Mat_FFT        *fft  = (Mat_FFT*)A->data;
131   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
132   PetscScalar    *x_array,*y_array;
133   PetscInt       ndim=fft->ndim,*dim=fft->dim;
134   MPI_Comm       comm=((PetscObject)A)->comm;
135 // PetscInt ctr;
136 //  ptrdiff_t      ndim1=(ptrdiff_t) ndim,*pdim;
137 //  ndim1=(ptrdiff_t) ndim;
138 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
139 
140 //  for(ctr=0;ctr<ndim;ctr++)
141 //     {
142 //      pdim[ctr] = dim[ctr];
143 //     }
144 
145   PetscFunctionBegin;
146 //#if !defined(PETSC_USE_COMPLEX)
147 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
148 //#endif
149 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
150 //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
151 
152   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
153   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
154   if (!fftw->p_forward){ /* create a plan, then excute it */
155     switch (ndim){
156     case 1:
157 #if defined(PETSC_USE_COMPLEX)
158       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
159 #endif
160       break;
161     case 2:
162 #if defined(PETSC_USE_COMPLEX)
163       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
164 #else
165       printf("The code comes here \n");
166       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
167 #endif
168       break;
169     case 3:
170 #if defined(PETSC_USE_COMPLEX)
171       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
172 #else
173       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
174 #endif
175       break;
176     default:
177 #if defined(PETSC_USE_COMPLEX)
178       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
179 #else
180       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
181 #endif
182  //     fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
183       break;
184     }
185     fftw->finarray  = x_array;
186     fftw->foutarray = y_array;
187     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
188                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
189     fftw_execute(fftw->p_forward);
190   } else { /* use existing plan */
191     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
192       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
193     } else {
194       fftw_execute(fftw->p_forward);
195     }
196   }
197   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
198   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
199   PetscFunctionReturn(0);
200 }
201 
202 #undef __FUNCT__
203 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
204 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
205 {
206   PetscErrorCode ierr;
207   Mat_FFT        *fft  = (Mat_FFT*)A->data;
208   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
209   PetscScalar    *x_array,*y_array;
210   PetscInt       ndim=fft->ndim,*dim=fft->dim;
211   MPI_Comm       comm=((PetscObject)A)->comm;
212 //  PetscInt       ctr;
213 //  ptrdiff_t      ndim1=(ptrdiff_t)ndim,*pdim;
214 //  ndim1=(ptrdiff_t) ndim;
215 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
216 
217 //  for(ctr=0;ctr<ndim;ctr++)
218 //     {
219 //      pdim[ctr] = dim[ctr];
220 //     }
221 
222   PetscFunctionBegin;
223 //#if !defined(PETSC_USE_COMPLEX)
224 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
225 //#endif
226 //  ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr);
227 // should pdim be a member of Mat_FFTW?
228 //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
229 
230   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
231   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
232   if (!fftw->p_backward){ /* create a plan, then excute it */
233     switch (ndim){
234     case 1:
235 #if defined(PETSC_USE_COMPLEX)
236       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
237 #endif
238       break;
239     case 2:
240 #if defined(PETSC_USE_COMPLEX)
241       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
242 #else
243       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
244 #endif
245       break;
246     case 3:
247 #if defined(PETSC_USE_COMPLEX)
248       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
249 #else
250       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
251 #endif
252       break;
253     default:
254 #if defined(PETSC_USE_COMPLEX)
255       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
256 #else
257       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
258 #endif
259 //      fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
260       break;
261     }
262     fftw->binarray  = x_array;
263     fftw->boutarray = y_array;
264     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
265   } else { /* use existing plan */
266     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
267       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
268     } else {
269       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
270     }
271   }
272   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
273   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatDestroy_FFTW"
279 PetscErrorCode MatDestroy_FFTW(Mat A)
280 {
281   Mat_FFT        *fft = (Mat_FFT*)A->data;
282   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
283   PetscErrorCode ierr;
284 
285   PetscFunctionBegin;
286 #if !defined(PETSC_USE_COMPLEX)
287   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
288 #endif
289   fftw_destroy_plan(fftw->p_forward);
290   fftw_destroy_plan(fftw->p_backward);
291   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
292   ierr = PetscFree(fft->data);CHKERRQ(ierr);
293   PetscFunctionReturn(0);
294 }
295 
296 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
297 #undef __FUNCT__
298 #define __FUNCT__ "VecDestroy_MPIFFTW"
299 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
300 {
301   PetscErrorCode  ierr;
302   PetscScalar     *array;
303 
304   PetscFunctionBegin;
305 #if !defined(PETSC_USE_COMPLEX)
306   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
307 #endif
308   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
309   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
310   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
311   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatGetVecs1DC_FFTW"
317 /*
318    MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the
319      parallel layout determined by FFTW-1D
320 
321 */
322 PetscErrorCode  MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout)
323 {
324   PetscErrorCode ierr;
325   PetscMPIInt    size,rank;
326   MPI_Comm       comm=((PetscObject)A)->comm;
327   Mat_FFT        *fft = (Mat_FFT*)A->data;
328 //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
329   PetscInt       N=fft->N;
330   PetscInt       ndim=fft->ndim,*dim=fft->dim;
331   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
332   ptrdiff_t      f_local_n1,f_local_1_end;
333   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
334   ptrdiff_t      b_local_n1,b_local_1_end;
335   fftw_complex   *data_fin,*data_fout,*data_bin,*data_bout;
336 
337   PetscFunctionBegin;
338 #if !defined(PETSC_USE_COMPLEX)
339   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
340 #endif
341   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
342   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
343   if (size == 1){
344     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
345   }
346   else {
347       if (ndim>1){
348         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
349       else {
350           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
351           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
352           if (fin) {
353             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
354             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
355             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
356           }
357           if (fout) {
358             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
359             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
360             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
361           }
362           if (bin) {
363             data_bin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
364             ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr);
365             (*bin)->ops->destroy   = VecDestroy_MPIFFTW;
366           }
367           if (bout) {
368             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
369             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
370             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
371           }
372   }
373   if (fin){
374     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
375   }
376   if (fout){
377     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
378   }
379   if (bin){
380     ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr);
381   }
382   if (bout){
383     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
384   }
385   PetscFunctionReturn(0);
386 }
387 
388 
389 }
390 
391 #undef __FUNCT__
392 #define __FUNCT__ "MatGetVecs_FFTW"
393 /*
394    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
395      parallel layout determined by FFTW
396 
397    Collective on Mat
398 
399    Input Parameter:
400 .  mat - the matrix
401 
402    Output Parameter:
403 +   fin - (optional) input vector of forward FFTW
404 -   fout - (optional) output vector of forward FFTW
405 
406   Level: advanced
407 
408 .seealso: MatCreateFFTW()
409 */
410 PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
411 {
412   PetscErrorCode ierr;
413   PetscMPIInt    size,rank;
414   MPI_Comm       comm=((PetscObject)A)->comm;
415   Mat_FFT        *fft = (Mat_FFT*)A->data;
416   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
417   PetscInt       N=fft->N, N1, n1,vsize;
418 
419   PetscFunctionBegin;
420 //#if !defined(PETSC_USE_COMPLEX)
421 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
422 //#endif
423   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
424   PetscValidType(A,1);
425 
426   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
427   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
428   if (size == 1){ /* sequential case */
429     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
430     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
431     printf("The code successfully comes at the end of the routine with one processor\n");
432   } else {        /* mpi case */
433     ptrdiff_t      alloc_local,local_n0,local_0_start;
434     ptrdiff_t      local_n1,local_1_end;
435     PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
436     fftw_complex   *data_fin,*data_fout;
437     double         *data_finr ;
438     ptrdiff_t      local_1_start,temp;
439 //    PetscInt ctr;
440 //    ptrdiff_t      ndim1,*pdim;
441 //    ndim1=(ptrdiff_t) ndim;
442 //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
443 
444 //    for(ctr=0;ctr<ndim;ctr++)
445 //        {k
446 //           pdim[ctr] = dim[ctr];
447 //       }
448 
449 
450 
451     switch (ndim){
452     case 1:
453       /* Get local size */
454       /* We need to write an error message here saying that one cannot call this routine when doing paralllel 1D complex FFTW */
455 //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
456       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
457       if (fin) {
458         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
459         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
460         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
461       }
462       if (fout) {
463         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
464         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
465         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
466       }
467       break;
468     case 2:
469 #if !defined(PETSC_USE_COMPLEX)
470       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
471       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
472       if (fin) {
473         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
474         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
475         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
476         //printf("The code comes here with vector size %d\n",vsize);
477         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
478       }
479       if (fout) {
480         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
481         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
482         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
483       }
484       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
485 
486 #else
487       /* Get local size */
488      printf("Hope this does not come here");
489       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
490       if (fin) {
491         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
492         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
493         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
494       }
495       if (fout) {
496         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
497         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
498         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
499       }
500      printf("Hope this does not come here");
501 #endif
502       break;
503     case 3:
504       /* Get local size */
505 #if !defined(PETSC_USE_COMPLEX)
506       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
507       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
508       if (fin) {
509         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
510         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
511         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
512         //printf("The code comes here with vector size %d\n",vsize);
513         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
514       }
515       if (fout) {
516         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
517         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
518         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
519       }
520       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
521 
522 
523 #else
524       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
525 //      printf("The quantity n is %d",n);
526       if (fin) {
527         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
528         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
529         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
530       }
531       if (fout) {
532         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
533         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
534         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
535       }
536 #endif
537       break;
538     default:
539       /* Get local size */
540 #if !defined(PETSC_USE_COMPLEX)
541       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
542       printf("The value of temp is %ld\n",temp);
543       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
544       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
545       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
546       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
547       if (fin) {
548         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
549         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
550         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
551         //printf("The code comes here with vector size %d\n",vsize);
552         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
553       }
554       if (fout) {
555         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
556         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
557         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
558       }
559 
560 #else
561       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
562 //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
563 //      printf("The value of alloc local is %d",alloc_local);
564 //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
565 //      for(i=0;i<ndim;i++)
566 //         {
567 //          pdim[i]=dim[i];printf("%d",pdim[i]);
568 //         }
569 //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
570 //      printf("The quantity n is %d",n);
571       if (fin) {
572         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
573         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
574         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
575       }
576       if (fout) {
577         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
578         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
579         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
580       }
581 #endif
582       break;
583     }
584   }
585 //  if (fin){
586 //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
587 //  }
588 //  if (fout){
589 //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
590 //  }
591   PetscFunctionReturn(0);
592 }
593 
594 //EXTERN_C_BEGIN - Do we need this?
595 #undef __FUNCT__
596 #define __FUNCT__ "InputTransformFFT"
597 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
598 {
599   PetscErrorCode ierr;
600   PetscFunctionBegin;
601   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
602   PetscFunctionReturn(0);
603 }
604 //EXTERN_C_END - Do we need this?
605 /*
606       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block
607   Input A, x, y
608   A - FFTW matrix
609   x - user data
610   Options Database Keys:
611 + -mat_fftw_plannerflags - set FFTW planner flags
612 
613    Level: intermediate
614 
615 */
616 
617 EXTERN_C_BEGIN
618 #undef __FUNCT__
619 #define __FUNCT__ "InputTransformFFT_FTTW"
620 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
621 {
622   PetscErrorCode ierr;
623   MPI_Comm       comm=((PetscObject)A)->comm;
624   Mat_FFT        *fft = (Mat_FFT*)A->data;
625   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
626   PetscInt       N=fft->N, N1, n1 ,NM;
627   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
628   PetscInt       low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4;
629   PetscInt       i,j,k,rank,size;
630   ptrdiff_t      alloc_local,local_n0,local_0_start;
631   ptrdiff_t      local_n1,local_1_start;
632   VecScatter     vecscat;
633   IS             list1,list2;
634 
635   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
636   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
637   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
638   printf("Local ownership starts at %d\n",low);
639 
640  switch (ndim){
641  case 1:
642   SETERRQ(comm,PETSC_ERR_SUP,"Not Supported by FFTW");
643   break;
644  case 2:
645       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
646       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
647 
648       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
649       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
650       printf("Val local_0_start = %ld\n",local_0_start);
651 
652       if (dim[1]%2==0)
653         NM = dim[1]+2;
654       else
655         NM = dim[1]+1;
656 
657       for (i=0;i<local_n0;i++){
658          for (j=0;j<dim[1];j++){
659             tempindx = i*dim[1] + j;
660             tempindx1 = i*NM + j;
661             indx1[tempindx]=local_0_start*dim[1]+tempindx;
662             indx2[tempindx]=low+tempindx1;
663            // printf("Val tempindx1 = %d\n",tempindx1);
664   //          printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
665   //          printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
666   //          printf("-------------------------\n",indx2[tempindx],rank);
667         }
668      }
669 
670       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
671       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
672 
673       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
674       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
675       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
676       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
677       break;
678 
679  case 3:
680       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
681       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
682 
683       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
684       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
685       printf("Val local_0_start = %ld\n",local_0_start);
686 
687       if (dim[2]%2==0)
688         NM = dim[2]+2;
689       else
690         NM = dim[2]+1;
691 
692       for (i=0;i<local_n0;i++){
693          for (j=0;j<dim[1];j++){
694             for (k=0;k<dim[2];k++){
695                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
696                tempindx1 = i*dim[1]*NM + j*NM + k;
697                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
698                indx2[tempindx]=low+tempindx1;
699             }
700            // printf("Val tempindx1 = %d\n",tempindx1);
701            // printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
702            // printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
703            // printf("-------------------------\n",indx2[tempindx],rank);
704         }
705      }
706 
707       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
708       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
709 
710       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
711       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
712       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
713       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
714       break;
715 
716  default:
717   SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet");
718   break;
719  }
720 
721  return 0;
722 }
723 EXTERN_C_END
724 
725 
726 //EXTERN_C_BEGIN - Do we need this?
727 #undef __FUNCT__
728 #define __FUNCT__ "OutputTransformFFT"
729 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
730 {
731   PetscErrorCode ierr;
732   PetscFunctionBegin;
733   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
734   PetscFunctionReturn(0);
735 }
736 //EXTERN_C_END - Do we need this?
737 /*
738       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
739   Input A, x, y
740   A - FFTW matrix
741   x - FFTW vector
742   y - PETSc vector that user can read
743   Options Database Keys:
744 + -mat_fftw_plannerflags - set FFTW planner flags
745 
746    Level: intermediate
747 
748 */
749 
750 EXTERN_C_BEGIN
751 #undef __FUNCT__
752 #define __FUNCT__ "OutputTransformFFT_FTTW"
753 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
754 {
755   PetscErrorCode ierr;
756   MPI_Comm       comm=((PetscObject)A)->comm;
757   Mat_FFT        *fft = (Mat_FFT*)A->data;
758   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
759   PetscInt       N=fft->N, N1, n1 ,NM;
760   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
761   PetscInt       low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4;
762   PetscInt       i,j,k,rank,size;
763   ptrdiff_t      alloc_local,local_n0,local_0_start;
764   ptrdiff_t      local_n1,local_1_start;
765   VecScatter     vecscat;
766   IS             list1,list2;
767 
768   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
769   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
770   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);
771   printf("Local ownership starts at %d\n",low);
772 
773  switch (ndim){
774  case 1:
775   SETERRQ(comm,PETSC_ERR_SUP,"Not Supported by FFTW");
776   break;
777  case 2:
778       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
779       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
780 
781      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
782      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
783      printf("Val local_0_start = %ld\n",local_0_start);
784 
785      if (dim[1]%2==0)
786       NM = dim[1]+2;
787     else
788       NM = dim[1]+1;
789 
790 
791 
792      for (i=0;i<local_n0;i++){
793         for (j=0;j<dim[1];j++){
794             tempindx = i*dim[1] + j;
795             tempindx1 = i*NM + j;
796             indx1[tempindx]=local_0_start*dim[1]+tempindx;
797             indx2[tempindx]=low+tempindx1;
798        //     printf("Val tempindx1 = %d\n",tempindx1);
799        //     printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
800        //     printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
801        //     printf("-------------------------\n",indx2[tempindx],rank);
802         }
803      }
804 
805      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
806      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
807 
808      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
809      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
810      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
811      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
812   break;
813 
814  case 3:
815       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
816       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
817 
818      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
819      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
820      printf("Val local_0_start = %ld\n",local_0_start);
821 
822      if (dim[2]%2==0)
823       NM = dim[2]+2;
824     else
825       NM = dim[2]+1;
826 
827      for (i=0;i<local_n0;i++){
828         for (j=0;j<dim[1];j++){
829            for (k=0;k<dim[2];k++){
830               tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
831               tempindx1 = i*dim[1]*NM + j*NM + k;
832               indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
833               indx2[tempindx]=low+tempindx1;
834            }
835         //    printf("Val tempindx1 = %d\n",tempindx1);
836         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
837         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
838         //    printf("-------------------------\n",indx2[tempindx],rank);
839         }
840      }
841 
842      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
843      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
844 
845      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
846      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
847      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
848      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
849   break;
850 
851  default:
852   SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet");
853   break;
854  }
855 
856  return 0;
857 }
858 EXTERN_C_END
859 
860 
861 
862 
863 EXTERN_C_BEGIN
864 #undef __FUNCT__
865 #define __FUNCT__ "MatCreate_FFTW"
866 /*
867       MatCreate_FFTW - Creates a matrix object that provides FFT
868   via the external package FFTW
869   Options Database Keys:
870 + -mat_fftw_plannerflags - set FFTW planner flags
871 
872    Level: intermediate
873 
874 */
875 
876 PetscErrorCode MatCreate_FFTW(Mat A)
877 {
878   PetscErrorCode ierr;
879   MPI_Comm       comm=((PetscObject)A)->comm;
880   Mat_FFT        *fft=(Mat_FFT*)A->data;
881   Mat_FFTW       *fftw;
882   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
883   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
884   PetscBool      flg;
885   PetscInt       p_flag,partial_dim=1,ctr,N1;
886   PetscMPIInt    size,rank;
887   ptrdiff_t      *pdim, temp;
888   ptrdiff_t      local_n1,local_1_start;
889 
890   PetscFunctionBegin;
891 //#if !defined(PETSC_USE_COMPLEX)
892 //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
893 //#endif
894 
895   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
896   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
897   ierr = MPI_Barrier(PETSC_COMM_WORLD);
898 
899   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
900   pdim[0] = dim[0];
901   for(ctr=1;ctr<ndim;ctr++)
902       {
903           partial_dim *= dim[ctr];
904           pdim[ctr] = dim[ctr];
905       }
906 //#if !defined(PETSC_USE_COMPLEX)
907 //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
908 //#endif
909 
910 //  printf("partial dimension is %d",partial_dim);
911   if (size == 1) {
912     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
913     n = N;
914   } else {
915     ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end;
916     switch (ndim){
917     case 1:
918 #if !defined(PETSC_USE_COMPLEX)
919   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
920 #endif
921       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
922       n = (PetscInt)local_n0;
923       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
924 //      PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW);
925       break;
926     case 2:
927 #if defined(PETSC_USE_COMPLEX)
928       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
929       /*
930        PetscMPIInt    rank;
931        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
932        PetscSynchronizedFlush(comm);
933        */
934       n = (PetscInt)local_n0*dim[1];
935       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
936 #else
937       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
938       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
939       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
940 #endif
941       break;
942     case 3:
943 //      printf("The value of alloc local is %d",alloc_local);
944 #if defined(PETSC_USE_COMPLEX)
945       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
946       n = (PetscInt)local_n0*dim[1]*dim[2];
947       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
948 #else
949       printf("The code comes here\n");
950       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
951       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
952       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
953 #endif
954       break;
955     default:
956 #if defined(PETSC_USE_COMPLEX)
957       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
958 //      printf("The value of alloc local is %ld from process %d\n",alloc_local,rank);
959 //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
960       n = (PetscInt)local_n0*partial_dim;
961 //      printf("New partial dimension is %d %d %d",n,N,ndim);
962       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
963 #else
964       temp = pdim[ndim-1];
965       pdim[ndim-1]= temp/2 + 1;
966       printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]);
967       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
968       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
969       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
970       pdim[ndim-1] = temp;
971       printf("For Multi dim case n = %d, N1  = %d\n",n,N1);
972       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
973 #endif
974       break;
975     }
976   }
977   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
978   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
979   fft->data = (void*)fftw;
980 
981   fft->n           = n;
982   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
983   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
984   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
985 
986   fftw->p_forward  = 0;
987   fftw->p_backward = 0;
988   fftw->p_flag     = FFTW_ESTIMATE;
989 
990   if (size == 1){
991     A->ops->mult          = MatMult_SeqFFTW;
992     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
993   } else {
994     A->ops->mult          = MatMult_MPIFFTW;
995     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
996   }
997   fft->matdestroy          = MatDestroy_FFTW;
998   A->ops->getvecs       = MatGetVecs_FFTW;
999   A->assembled          = PETSC_TRUE;
1000 #if !defined(PETSC_USE_COMPLEX)
1001   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
1002   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
1003 #endif
1004 
1005   /* get runtime options */
1006   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1007     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1008     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1009   PetscOptionsEnd();
1010   PetscFunctionReturn(0);
1011 }
1012 EXTERN_C_END
1013 
1014 
1015 
1016 
1017