xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 64657d84d43e0896dae809bb9337e08748670383)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   PetscInt   partial_dim;
15   fftw_plan   p_forward,p_backward;
16   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18                                                             executed for the arrays with which the plan was created */
19 } Mat_FFTW;
20 
21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25 extern PetscErrorCode MatDestroy_FFTW(Mat);
26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
28 
29 #undef __FUNCT__
30 #define __FUNCT__ "MatMult_SeqFFTW"
31 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
32 {
33   PetscErrorCode ierr;
34   Mat_FFT        *fft  = (Mat_FFT*)A->data;
35   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
36   PetscScalar    *x_array,*y_array;
37   PetscInt       ndim=fft->ndim,*dim=fft->dim;
38 
39   PetscFunctionBegin;
40   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
41   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
42   if (!fftw->p_forward){ /* create a plan, then excute it */
43     switch (ndim){
44     case 1:
45 #if defined(PETSC_USE_COMPLEX)
46       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
47 #else
48       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
49 #endif
50       break;
51     case 2:
52 #if defined(PETSC_USE_COMPLEX)
53       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
54 #else
55       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
56 #endif
57       break;
58     case 3:
59 #if defined(PETSC_USE_COMPLEX)
60       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
61 #else
62       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
63 #endif
64       break;
65     default:
66 #if defined(PETSC_USE_COMPLEX)
67       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
68 #else
69       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
70 #endif
71       break;
72     }
73     fftw->finarray  = x_array;
74     fftw->foutarray = y_array;
75     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
76                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
77     fftw_execute(fftw->p_forward);
78   } else { /* use existing plan */
79     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
80       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
81     } else {
82       fftw_execute(fftw->p_forward);
83     }
84   }
85   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
86   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
92 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
93 {
94   PetscErrorCode ierr;
95   Mat_FFT        *fft = (Mat_FFT*)A->data;
96   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
97   PetscScalar    *x_array,*y_array;
98   PetscInt       ndim=fft->ndim,*dim=fft->dim;
99 
100   PetscFunctionBegin;
101   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
102   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
103   if (!fftw->p_backward){ /* create a plan, then excute it */
104     switch (ndim){
105     case 1:
106 #if defined(PETSC_USE_COMPLEX)
107       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
108 #else
109       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
110 #endif
111       break;
112     case 2:
113 #if defined(PETSC_USE_COMPLEX)
114       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
115 #else
116       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
117 #endif
118       break;
119     case 3:
120 #if defined(PETSC_USE_COMPLEX)
121       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
122 #else
123       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
124 #endif
125       break;
126     default:
127 #if defined(PETSC_USE_COMPLEX)
128       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
129 #else
130       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
131 #endif
132       break;
133     }
134     fftw->binarray  = x_array;
135     fftw->boutarray = y_array;
136     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
137   } else { /* use existing plan */
138     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
139       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
140     } else {
141       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
142     }
143   }
144   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
145   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 #undef __FUNCT__
150 #define __FUNCT__ "MatMult_MPIFFTW"
151 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
152 {
153   PetscErrorCode ierr;
154   Mat_FFT        *fft  = (Mat_FFT*)A->data;
155   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
156   PetscScalar    *x_array,*y_array;
157   PetscInt       ndim=fft->ndim,*dim=fft->dim;
158   MPI_Comm       comm=((PetscObject)A)->comm;
159 
160   PetscFunctionBegin;
161   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
162   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
163   if (!fftw->p_forward){ /* create a plan, then excute it */
164     switch (ndim){
165     case 1:
166 #if defined(PETSC_USE_COMPLEX)
167       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
168 #else
169       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
170 #endif
171       break;
172     case 2:
173 #if defined(PETSC_USE_COMPLEX)
174       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
175 #else
176       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
177 #endif
178       break;
179     case 3:
180 #if defined(PETSC_USE_COMPLEX)
181       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
182 #else
183       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
184 #endif
185       break;
186     default:
187 #if defined(PETSC_USE_COMPLEX)
188       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
189 #else
190       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
191 #endif
192       break;
193     }
194     fftw->finarray  = x_array;
195     fftw->foutarray = y_array;
196     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
197                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
198     fftw_execute(fftw->p_forward);
199   } else { /* use existing plan */
200     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
201       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
202     } else {
203       fftw_execute(fftw->p_forward);
204     }
205   }
206   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
207   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
208   PetscFunctionReturn(0);
209 }
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
213 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
214 {
215   PetscErrorCode ierr;
216   Mat_FFT        *fft  = (Mat_FFT*)A->data;
217   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
218   PetscScalar    *x_array,*y_array;
219   PetscInt       ndim=fft->ndim,*dim=fft->dim;
220   MPI_Comm       comm=((PetscObject)A)->comm;
221 
222   PetscFunctionBegin;
223   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
224   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
225   if (!fftw->p_backward){ /* create a plan, then excute it */
226     switch (ndim){
227     case 1:
228 #if defined(PETSC_USE_COMPLEX)
229       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
230 #else
231       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
232 #endif
233       break;
234     case 2:
235 #if defined(PETSC_USE_COMPLEX)
236       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
237 #else
238       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
239 #endif
240       break;
241     case 3:
242 #if defined(PETSC_USE_COMPLEX)
243       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
244 #else
245       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
246 #endif
247       break;
248     default:
249 #if defined(PETSC_USE_COMPLEX)
250       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
251 #else
252       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
253 #endif
254       break;
255     }
256     fftw->binarray  = x_array;
257     fftw->boutarray = y_array;
258     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
259   } else { /* use existing plan */
260     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
261       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
262     } else {
263       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
264     }
265   }
266   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
267   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatDestroy_FFTW"
273 PetscErrorCode MatDestroy_FFTW(Mat A)
274 {
275   Mat_FFT        *fft = (Mat_FFT*)A->data;
276   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
277   PetscErrorCode ierr;
278 
279   PetscFunctionBegin;
280   fftw_destroy_plan(fftw->p_forward);
281   fftw_destroy_plan(fftw->p_backward);
282   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
283   ierr = PetscFree(fft->data);CHKERRQ(ierr);
284   PetscFunctionReturn(0);
285 }
286 
287 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
288 #undef __FUNCT__
289 #define __FUNCT__ "VecDestroy_MPIFFTW"
290 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
291 {
292   PetscErrorCode  ierr;
293   PetscScalar     *array;
294 
295   PetscFunctionBegin;
296   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
297   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
298   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
299   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatGetVecs1DC_FFTW"
305 /*
306    MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the
307      parallel layout determined by FFTW-1D
308 
309 */
310 PetscErrorCode  MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bout)
311 {
312   PetscErrorCode ierr;
313   PetscMPIInt    size,rank;
314   MPI_Comm       comm=((PetscObject)A)->comm;
315   Mat_FFT        *fft = (Mat_FFT*)A->data;
316 //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
317   PetscInt       N=fft->N;
318   PetscInt       ndim=fft->ndim,*dim=fft->dim;
319   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
320   ptrdiff_t      f_local_n1,f_local_1_end;
321   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
322   ptrdiff_t      b_local_n1,b_local_1_end;
323   fftw_complex   *data_fin,*data_fout,*data_bout;
324 
325   PetscFunctionBegin;
326 #if !defined(PETSC_USE_COMPLEX)
327   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
328 #endif
329   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
330   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
331   if (size == 1){
332     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
333   }
334   else {
335       if (ndim>1){
336         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
337       else {
338           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
339           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
340           if (fin) {
341             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
342             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
343             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
344           }
345           if (fout) {
346             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
347             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
348             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
349           }
350           if (bout) {
351             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
352             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
353             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
354           }
355   }
356   if (fin){
357     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
358   }
359   if (fout){
360     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
361   }
362   if (bout){
363     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
364   }
365   PetscFunctionReturn(0);
366 }
367 
368 
369 }
370 
371 #undef __FUNCT__
372 #define __FUNCT__ "MatGetVecs_FFTW"
373 /*
374    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
375      parallel layout determined by FFTW
376 
377    Collective on Mat
378 
379    Input Parameter:
380 .  mat - the matrix
381 
382    Output Parameter:
383 +   fin - (optional) input vector of forward FFTW
384 -   fout - (optional) output vector of forward FFTW
385 
386   Level: advanced
387 
388 .seealso: MatCreateFFTW()
389 */
390 PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
391 {
392   PetscErrorCode ierr;
393   PetscMPIInt    size,rank;
394   MPI_Comm       comm=((PetscObject)A)->comm;
395   Mat_FFT        *fft = (Mat_FFT*)A->data;
396   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
397   PetscInt       N=fft->N, N1, n1,vsize;
398   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
399 
400   PetscFunctionBegin;
401   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
402   PetscValidType(A,1);
403 
404   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
405   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
406   if (size == 1){ /* sequential case */
407 #if defined(PETSC_USE_COMPLEX)
408     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
409     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
410 #else
411     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
412     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
413 #endif
414   } else {        /* mpi case */
415     ptrdiff_t      alloc_local,local_n0,local_0_start;
416     ptrdiff_t      local_n1,local_1_end;
417     fftw_complex   *data_fin,*data_fout;
418     double         *data_finr ;
419     ptrdiff_t      local_1_start,temp;
420 //    PetscInt ctr;
421 //    ptrdiff_t      ndim1,*pdim;
422 //    ndim1=(ptrdiff_t) ndim;
423 //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
424 
425 //    for(ctr=0;ctr<ndim;ctr++)
426 //        {k
427 //           pdim[ctr] = dim[ctr];
428 //       }
429 
430 
431 
432     switch (ndim){
433     case 1:
434       /* Get local size */
435       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
436 //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
437       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
438       if (fin) {
439         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
440         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
441         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
442       }
443       if (fout) {
444         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
445         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
446         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
447       }
448       break;
449     case 2:
450 #if !defined(PETSC_USE_COMPLEX)
451       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
452       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
453       if (fin) {
454         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
455         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
456         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
457         //printf("The code comes here with vector size %d\n",vsize);
458         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
459       }
460       if (fout) {
461         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
462         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
463         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
464       }
465       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
466 
467 #else
468       /* Get local size */
469      //printf("Hope this does not come here");
470       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
471       if (fin) {
472         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
473         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
474         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
475       }
476       if (fout) {
477         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
478         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
479         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
480       }
481      //printf("Hope this does not come here");
482 #endif
483       break;
484     case 3:
485       /* Get local size */
486 #if !defined(PETSC_USE_COMPLEX)
487       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
488       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
489       if (fin) {
490         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
491         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
492         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
493         //printf("The code comes here with vector size %d\n",vsize);
494         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
495       }
496       if (fout) {
497         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
498         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
499         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
500       }
501       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
502 
503 
504 #else
505       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
506 //      printf("The quantity n is %d",n);
507       if (fin) {
508         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
509         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
510         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
511       }
512       if (fout) {
513         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
514         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
515         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
516       }
517 #endif
518       break;
519     default:
520       /* Get local size */
521 #if !defined(PETSC_USE_COMPLEX)
522       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
523       printf("The value of temp is %ld\n",temp);
524       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
525       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
526       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
527       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
528       if (fin) {
529         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
530         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
531         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
532         //printf("The code comes here with vector size %d\n",vsize);
533         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
534       }
535       if (fout) {
536         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
537         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
538         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
539       }
540 
541 #else
542       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
543 //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
544 //      printf("The value of alloc local is %d",alloc_local);
545 //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
546 //      for(i=0;i<ndim;i++)
547 //         {
548 //          pdim[i]=dim[i];printf("%d",pdim[i]);
549 //         }
550 //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
551 //      printf("The quantity n is %d",n);
552       if (fin) {
553         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
554         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
555         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
556       }
557       if (fout) {
558         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
559         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
560         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
561       }
562 #endif
563       break;
564     }
565   }
566 //  if (fin){
567 //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
568 //  }
569 //  if (fout){
570 //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
571 //  }
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "InputTransformFFT"
577 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
578 {
579   PetscErrorCode ierr;
580   PetscFunctionBegin;
581   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
582   PetscFunctionReturn(0);
583 }
584 
585 /*
586       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block
587   Input A, x, y
588   A - FFTW matrix
589   x - user data
590   Options Database Keys:
591 + -mat_fftw_plannerflags - set FFTW planner flags
592 
593    Level: intermediate
594 
595 */
596 
597 EXTERN_C_BEGIN
598 #undef __FUNCT__
599 #define __FUNCT__ "InputTransformFFT_FTTW"
600 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
601 {
602   PetscErrorCode ierr;
603   MPI_Comm       comm=((PetscObject)A)->comm;
604   Mat_FFT        *fft = (Mat_FFT*)A->data;
605   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
606   PetscInt       N=fft->N, N1, n1 ,NM;
607   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
608   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
609   PetscInt       i,j,k,rank,size,partial_dim;
610   ptrdiff_t      alloc_local,local_n0,local_0_start;
611   ptrdiff_t      local_n1,local_1_start,temp;
612   VecScatter     vecscat;
613   IS             list1,list2;
614 
615   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
616   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
617   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
618   printf("Local ownership starts at %d\n",low);
619 
620   if (size==1)
621     {
622 /*     switch (ndim){
623      case 1:
624           ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
625           for (i=0;i<dim[0];i++)
626              {
627               indx1[i] = i;
628              }
629           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
630           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
631           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
632           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
633           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
634           ierr = ISDestroy(&list1);CHKERRQ(ierr);
635           ierr = PetscFree(indx1);CHKERRQ(ierr);
636      break;
637 
638      case 2:
639           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
640           for (i=0;i<dim[0];i++){
641              for (j=0;j<dim[1];j++){
642                 indx1[i*dim[1]+j] = i*dim[1] + j;
643              }
644           }
645           ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
646           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
647           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
648           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
649           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
650           ierr = ISDestroy(&list1);CHKERRQ(ierr);
651           ierr = PetscFree(indx1);CHKERRQ(ierr);
652           break;
653      case 3:
654           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
655           for (i=0;i<dim[0];i++){
656              for (j=0;j<dim[1];j++){
657                 for (k=0;k<dim[2];k++){
658                    indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
659                 }
660              }
661           }
662           ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
663           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
664           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
665           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
666           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
667           ierr = ISDestroy(&list1);CHKERRQ(ierr);
668           ierr = PetscFree(indx1);CHKERRQ(ierr);
669           break;
670      default:
671 */
672           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);
673           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
674           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
675           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
676           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
677           ierr = ISDestroy(&list1);CHKERRQ(ierr);
678           //ierr = ISDestroy(list1);CHKERRQ(ierr);
679  //         break;
680   //    }
681     }
682 
683  else{
684 
685  switch (ndim){
686  case 1:
687 #if defined(PETSC_USE_COMPLEX)
688       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
689       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
690       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
691       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
692       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
693       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
694       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
695       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
696       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
697       ierr = ISDestroy(&list1);CHKERRQ(ierr);
698       ierr = ISDestroy(&list2);CHKERRQ(ierr);
699       break;
700 #else
701   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
702 #endif
703   break;
704  case 2:
705 #if defined(PETSC_USE_COMPLEX)
706       //PetscInt my_value;
707       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
708       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
709       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
710       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
711       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
712       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
713       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
714       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
715       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
716       ierr = ISDestroy(&list1);CHKERRQ(ierr);
717       ierr = ISDestroy(&list2);CHKERRQ(ierr);
718       break;
719 #else
720       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
721       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
722 
723       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
724       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
725       printf("Val local_0_start = %ld\n",local_0_start);
726 
727       if (dim[1]%2==0)
728         NM = dim[1]+2;
729       else
730         NM = dim[1]+1;
731 
732       for (i=0;i<local_n0;i++){
733          for (j=0;j<dim[1];j++){
734             tempindx = i*dim[1] + j;
735             tempindx1 = i*NM + j;
736             indx1[tempindx]=local_0_start*dim[1]+tempindx;
737             indx2[tempindx]=low+tempindx1;
738            // printf("Val tempindx1 = %d\n",tempindx1);
739   //          printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
740   //          printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
741   //          printf("-------------------------\n",indx2[tempindx],rank);
742         }
743      }
744 
745       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
746       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
747 
748       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
749       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
750       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
751       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
752       ierr = ISDestroy(&list1);CHKERRQ(ierr);
753       ierr = ISDestroy(&list2);CHKERRQ(ierr);
754       ierr = PetscFree(indx1);CHKERRQ(ierr);
755       ierr = PetscFree(indx2);CHKERRQ(ierr);
756       break;
757 #endif
758 
759  case 3:
760       #if defined(PETSC_USE_COMPLEX)
761       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
762       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
763       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
764       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
765       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
766       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
767       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
768       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
769       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
770       ierr = ISDestroy(&list1);CHKERRQ(ierr);
771       ierr = ISDestroy(&list2);CHKERRQ(ierr);
772       break;
773 #else
774       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
775       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
776 
777       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
778       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
779       printf("Val local_0_start = %ld\n",local_0_start);
780 
781       if (dim[2]%2==0)
782         NM = dim[2]+2;
783       else
784         NM = dim[2]+1;
785 
786       for (i=0;i<local_n0;i++){
787          for (j=0;j<dim[1];j++){
788             for (k=0;k<dim[2];k++){
789                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
790                tempindx1 = i*dim[1]*NM + j*NM + k;
791                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
792                indx2[tempindx]=low+tempindx1;
793             }
794            // printf("Val tempindx1 = %d\n",tempindx1);
795            // printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
796            // printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
797            // printf("-------------------------\n",indx2[tempindx],rank);
798         }
799      }
800 
801       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
802       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
803 
804       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
805       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
806       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
807       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
808       ierr = ISDestroy(&list1);CHKERRQ(ierr);
809       ierr = ISDestroy(&list2);CHKERRQ(ierr);
810       ierr = PetscFree(indx1);CHKERRQ(ierr);
811       ierr = PetscFree(indx2);CHKERRQ(ierr);
812       break;
813 #endif
814 
815  default:
816 #if defined(PETSC_USE_COMPLEX)
817       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
818       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
819       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
820       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
821       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
822       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
823       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
824       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
826       ierr = ISDestroy(&list1);CHKERRQ(ierr);
827       ierr = ISDestroy(&list2);CHKERRQ(ierr);
828 
829 
830 #else
831       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
832       printf("The value of temp is %ld\n",temp);
833       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
834       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
835       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
836       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
837 
838       partial_dim = fftw->partial_dim;
839       printf("The value of partial dim is %d\n",partial_dim);
840 
841       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
842       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
843       printf("Val local_0_start = %ld\n",local_0_start);
844 
845       if (dim[ndim-1]%2==0)
846         NM = 2;
847       else
848         NM = 1;
849 
850       j = low;
851       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
852          {
853           indx1[i] = local_0_start*partial_dim + i;
854           indx2[i] = j;
855           //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
856           if (k%dim[ndim-1]==0)
857             { j+=NM;}
858           j++;
859          }
860       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
861       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
862       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
863       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
864       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
865       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
866       ierr = ISDestroy(&list1);CHKERRQ(ierr);
867       ierr = ISDestroy(&list2);CHKERRQ(ierr);
868       ierr = PetscFree(indx1);CHKERRQ(ierr);
869       ierr = PetscFree(indx2);CHKERRQ(ierr);
870       break;
871 #endif
872   }
873  }
874 
875  return 0;
876 }
877 EXTERN_C_END
878 
879 #undef __FUNCT__
880 #define __FUNCT__ "OutputTransformFFT"
881 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
882 {
883   PetscErrorCode ierr;
884   PetscFunctionBegin;
885   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
886   PetscFunctionReturn(0);
887 }
888 
889 /*
890       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
891   Input A, x, y
892   A - FFTW matrix
893   x - FFTW vector
894   y - PETSc vector that user can read
895   Options Database Keys:
896 + -mat_fftw_plannerflags - set FFTW planner flags
897 
898    Level: intermediate
899 
900 */
901 
902 EXTERN_C_BEGIN
903 #undef __FUNCT__
904 #define __FUNCT__ "OutputTransformFFT_FTTW"
905 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
906 {
907   PetscErrorCode ierr;
908   MPI_Comm       comm=((PetscObject)A)->comm;
909   Mat_FFT        *fft = (Mat_FFT*)A->data;
910   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
911   PetscInt       N=fft->N, N1, n1 ,NM;
912   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
913   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
914   PetscInt       i,j,k,rank,size,partial_dim;
915   ptrdiff_t      alloc_local,local_n0,local_0_start;
916   ptrdiff_t      local_n1,local_1_start,temp;
917   VecScatter     vecscat;
918   IS             list1,list2;
919 
920   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
921   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
922   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);
923 
924   if (size==1){
925 /*
926     switch (ndim){
927     case 1:
928            ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
929           for (i=0;i<dim[0];i++)
930              {
931               indx1[i] = i;
932              }
933           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
934           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
935           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
936           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
937           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
938           ierr = ISDestroy(&list1);CHKERRQ(ierr);
939           ierr = PetscFree(indx1);CHKERRQ(ierr);
940           break;
941 
942     case 2:
943          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
944           for (i=0;i<dim[0];i++){
945              for (j=0;j<dim[1];j++){
946                 indx1[i*dim[1]+j] = i*dim[1] + j;
947              }
948           }
949          ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
950          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
951          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
952          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
953          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
954          ierr = ISDestroy(&list1);CHKERRQ(ierr);
955          ierr = PetscFree(indx1);CHKERRQ(ierr);
956          break;
957     case 3:
958          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
959          for (i=0;i<dim[0];i++){
960             for (j=0;j<dim[1];j++){
961                for (k=0;k<dim[2];k++){
962                   indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
963                }
964             }
965          }
966          ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
967          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
968          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
969          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
970          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
971          ierr = ISDestroy(&list1);CHKERRQ(ierr);
972          ierr = PetscFree(indx1);CHKERRQ(ierr);
973          break;
974     default:
975 */
976          ierr = ISCreateStride(comm,N,0,1,&list1);
977          //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF);
978          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
979          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
980          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
981          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
982          ierr = ISDestroy(&list1);CHKERRQ(ierr);
983   //       break;
984    // }
985   }
986   else{
987 
988  switch (ndim){
989  case 1:
990 #if defined(PETSC_USE_COMPLEX)
991       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
992       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
993       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
994       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
995       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
996       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
997       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
998       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
999       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1000       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1001       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1002       break;
1003 
1004 #else
1005       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
1006 #endif
1007   break;
1008  case 2:
1009 #if defined(PETSC_USE_COMPLEX)
1010       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1011       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1012       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
1013       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1014       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1015       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1016       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1017       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1018       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1019       break;
1020 #else
1021       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1022       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
1023 
1024      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1025      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1026      printf("Val local_0_start = %ld\n",local_0_start);
1027 
1028      if (dim[1]%2==0)
1029       NM = dim[1]+2;
1030     else
1031       NM = dim[1]+1;
1032 
1033 
1034 
1035      for (i=0;i<local_n0;i++){
1036         for (j=0;j<dim[1];j++){
1037             tempindx = i*dim[1] + j;
1038             tempindx1 = i*NM + j;
1039             indx1[tempindx]=local_0_start*dim[1]+tempindx;
1040             indx2[tempindx]=low+tempindx1;
1041        //     printf("Val tempindx1 = %d\n",tempindx1);
1042        //     printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1043        //     printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1044        //     printf("-------------------------\n",indx2[tempindx],rank);
1045         }
1046      }
1047 
1048      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1049      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1050 
1051      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1052      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1053      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1054      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1055      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1056      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1057      ierr = PetscFree(indx1);CHKERRQ(ierr);
1058      ierr = PetscFree(indx2);CHKERRQ(ierr);
1059   break;
1060 #endif
1061  case 3:
1062 #if defined(PETSC_USE_COMPLEX)
1063       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1064       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1065       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1066       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1067       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1068       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1069       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1070       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1071       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1072       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1073       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1074       break;
1075 #else
1076 
1077       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1078       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
1079 
1080      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1081      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1082      printf("Val local_0_start = %ld\n",local_0_start);
1083 
1084      if (dim[2]%2==0)
1085       NM = dim[2]+2;
1086     else
1087       NM = dim[2]+1;
1088 
1089      for (i=0;i<local_n0;i++){
1090         for (j=0;j<dim[1];j++){
1091            for (k=0;k<dim[2];k++){
1092               tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1093               tempindx1 = i*dim[1]*NM + j*NM + k;
1094               indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1095               indx2[tempindx]=low+tempindx1;
1096            }
1097         //    printf("Val tempindx1 = %d\n",tempindx1);
1098         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1099         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1100         //    printf("-------------------------\n",indx2[tempindx],rank);
1101         }
1102      }
1103 
1104      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1105      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1106 
1107      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1108      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1109      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1110      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1111      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1112      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1113      ierr = PetscFree(indx1);CHKERRQ(ierr);
1114      ierr = PetscFree(indx2);CHKERRQ(ierr);
1115   break;
1116 #endif
1117   default:
1118 #if defined(PETSC_USE_COMPLEX)
1119       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1120       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1121       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1122       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1123       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1124       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1125       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1126       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1127       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1128       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1129       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1130 #else
1131      temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1132      printf("The value of temp is %ld\n",temp);
1133      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1134      alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1135      N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1136      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1137 
1138      partial_dim = fftw->partial_dim;
1139      printf("The value of partial dim is %d\n",partial_dim);
1140 
1141      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1142      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1143      printf("Val local_0_start = %ld\n",local_0_start);
1144 
1145      if (dim[ndim-1]%2==0)
1146        NM = 2;
1147      else
1148        NM = 1;
1149 
1150      j = low;
1151      for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1152         {
1153          indx1[i] = local_0_start*partial_dim + i;
1154          indx2[i] = j;
1155          //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
1156          if (k%dim[ndim-1]==0)
1157            { j+=NM;}
1158          j++;
1159         }
1160      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1161      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1162 
1163       //ISView(list1,PETSC_VIEWER_STDOUT_SELF);
1164 
1165 
1166      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1167      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1168      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1169      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1170      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1171      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1172      ierr = PetscFree(indx1);CHKERRQ(ierr);
1173      ierr = PetscFree(indx2);CHKERRQ(ierr);
1174 
1175      break;
1176 #endif
1177  }
1178  }
1179  return 0;
1180 }
1181 EXTERN_C_END
1182 
1183 EXTERN_C_BEGIN
1184 #undef __FUNCT__
1185 #define __FUNCT__ "MatCreate_FFTW"
1186 /*
1187       MatCreate_FFTW - Creates a matrix object that provides FFT
1188   via the external package FFTW
1189   Options Database Keys:
1190 + -mat_fftw_plannerflags - set FFTW planner flags
1191 
1192    Level: intermediate
1193 
1194 */
1195 
1196 PetscErrorCode MatCreate_FFTW(Mat A)
1197 {
1198   PetscErrorCode ierr;
1199   MPI_Comm       comm=((PetscObject)A)->comm;
1200   Mat_FFT        *fft=(Mat_FFT*)A->data;
1201   Mat_FFTW       *fftw;
1202   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1203   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1204   PetscBool      flg;
1205   PetscInt       p_flag,partial_dim=1,ctr,N1;
1206   PetscMPIInt    size,rank;
1207   ptrdiff_t      *pdim, temp;
1208   ptrdiff_t      local_n1,local_1_start,local_1_end;
1209 
1210   PetscFunctionBegin;
1211   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1212   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1213   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1214 
1215   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
1216   pdim[0] = dim[0];
1217   for(ctr=1;ctr<ndim;ctr++)
1218       {
1219           partial_dim *= dim[ctr];
1220           pdim[ctr] = dim[ctr];
1221       }
1222 
1223   if (size == 1) {
1224 #if defined(PETSC_USE_COMPLEX)
1225     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1226     n = N;
1227 #else
1228     int tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1];
1229     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1230     n = tot_dim;
1231 #endif
1232 
1233   } else {
1234     ptrdiff_t alloc_local,local_n0,local_0_start;//local_n1,local_1_end;
1235     switch (ndim){
1236     case 1:
1237 #if !defined(PETSC_USE_COMPLEX)
1238   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1239 #else
1240       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
1241       n = (PetscInt)local_n0;
1242       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1243 #endif
1244 //      PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW);
1245       break;
1246     case 2:
1247 #if defined(PETSC_USE_COMPLEX)
1248       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1249       /*
1250        PetscMPIInt    rank;
1251        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1252        PetscSynchronizedFlush(comm);
1253        */
1254       n = (PetscInt)local_n0*dim[1];
1255       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1256 #else
1257       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1258       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1259       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1260 #endif
1261       break;
1262     case 3:
1263 //      printf("The value of alloc local is %d",alloc_local);
1264 #if defined(PETSC_USE_COMPLEX)
1265       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1266       n = (PetscInt)local_n0*dim[1]*dim[2];
1267       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1268 #else
1269       printf("The code comes here\n");
1270       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1271       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1272       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1273 #endif
1274       break;
1275     default:
1276 #if defined(PETSC_USE_COMPLEX)
1277       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1278 //      printf("The value of alloc local is %ld from process %d\n",alloc_local,rank);
1279 //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
1280       n = (PetscInt)local_n0*partial_dim;
1281 //      printf("New partial dimension is %d %d %d",n,N,ndim);
1282       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1283 #else
1284       temp = pdim[ndim-1];
1285       pdim[ndim-1]= temp/2 + 1;
1286       printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]);
1287       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1288       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1289       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1290       pdim[ndim-1] = temp;
1291       printf("For Multi dim case n = %d, N1  = %d\n",n,N1);
1292       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1293 #endif
1294       break;
1295     }
1296   }
1297   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1298   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1299   fft->data = (void*)fftw;
1300 
1301   fft->n           = n;
1302   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1303   fftw->partial_dim = partial_dim;
1304   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1305   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1306 
1307   fftw->p_forward  = 0;
1308   fftw->p_backward = 0;
1309   fftw->p_flag     = FFTW_ESTIMATE;
1310 
1311   if (size == 1){
1312     A->ops->mult          = MatMult_SeqFFTW;
1313     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1314   } else {
1315     A->ops->mult          = MatMult_MPIFFTW;
1316     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1317   }
1318   fft->matdestroy          = MatDestroy_FFTW;
1319 // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D
1320   A->ops->getvecs       = MatGetVecs_FFTW;
1321   A->assembled          = PETSC_TRUE;
1322   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
1323   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
1324 
1325   /* get runtime options */
1326   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1327     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1328     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1329   PetscOptionsEnd();
1330   PetscFunctionReturn(0);
1331 }
1332 EXTERN_C_END
1333 
1334 
1335 
1336 
1337