xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision e5d7f247475a58b54eed84ab1606c36769b8e4b7)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   PetscInt   partial_dim;
15   fftw_plan   p_forward,p_backward;
16   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18                                                             executed for the arrays with which the plan was created */
19 } Mat_FFTW;
20 
21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25 extern PetscErrorCode MatDestroy_FFTW(Mat);
26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
28 
29 #undef __FUNCT__
30 #define __FUNCT__ "MatMult_SeqFFTW"
31 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
32 {
33   PetscErrorCode ierr;
34   Mat_FFT        *fft  = (Mat_FFT*)A->data;
35   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
36   PetscScalar    *x_array,*y_array;
37   PetscInt       ndim=fft->ndim,*dim=fft->dim;
38 
39   PetscFunctionBegin;
40 #if !defined(PETSC_USE_COMPLEX)
41 
42   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
43 #endif
44   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
45   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
46   if (!fftw->p_forward){ /* create a plan, then excute it */
47     switch (ndim){
48     case 1:
49       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
50       break;
51     case 2:
52       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
53       break;
54     case 3:
55       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
56       break;
57     default:
58       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
59       break;
60     }
61     fftw->finarray  = x_array;
62     fftw->foutarray = y_array;
63     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
64                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
65     fftw_execute(fftw->p_forward);
66   } else { /* use existing plan */
67     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
68       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
69     } else {
70       fftw_execute(fftw->p_forward);
71     }
72   }
73   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
74   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 
78 #undef __FUNCT__
79 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
80 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
81 {
82   PetscErrorCode ierr;
83   Mat_FFT        *fft = (Mat_FFT*)A->data;
84   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
85   PetscScalar    *x_array,*y_array;
86   PetscInt       ndim=fft->ndim,*dim=fft->dim;
87 
88   PetscFunctionBegin;
89 #if !defined(PETSC_USE_COMPLEX)
90   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
91 #endif
92   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
93   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
94   if (!fftw->p_backward){ /* create a plan, then excute it */
95     switch (ndim){
96     case 1:
97       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
98       break;
99     case 2:
100       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
101       break;
102     case 3:
103       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
104       break;
105     default:
106       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
107       break;
108     }
109     fftw->binarray  = x_array;
110     fftw->boutarray = y_array;
111     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
112   } else { /* use existing plan */
113     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
114       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
115     } else {
116       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
117     }
118   }
119   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
120   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatMult_MPIFFTW"
126 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
127 {
128   PetscErrorCode ierr;
129   Mat_FFT        *fft  = (Mat_FFT*)A->data;
130   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
131   PetscScalar    *x_array,*y_array;
132   PetscInt       ndim=fft->ndim,*dim=fft->dim;
133   MPI_Comm       comm=((PetscObject)A)->comm;
134 // PetscInt ctr;
135 //  ptrdiff_t      ndim1=(ptrdiff_t) ndim,*pdim;
136 //  ndim1=(ptrdiff_t) ndim;
137 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
138 
139 //  for(ctr=0;ctr<ndim;ctr++)
140 //     {
141 //      pdim[ctr] = dim[ctr];
142 //     }
143 
144   PetscFunctionBegin;
145 //#if !defined(PETSC_USE_COMPLEX)
146 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
147 //#endif
148 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
149 //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
150 
151   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
152   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
153   if (!fftw->p_forward){ /* create a plan, then excute it */
154     switch (ndim){
155     case 1:
156 #if defined(PETSC_USE_COMPLEX)
157       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
158 #endif
159       break;
160     case 2:
161 #if defined(PETSC_USE_COMPLEX)
162       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
163 #else
164       printf("The code comes here \n");
165       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
166 #endif
167       break;
168     case 3:
169 #if defined(PETSC_USE_COMPLEX)
170       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
171 #else
172       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
173 #endif
174       break;
175     default:
176 #if defined(PETSC_USE_COMPLEX)
177       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
178 #else
179       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
180 #endif
181  //     fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
182       break;
183     }
184     fftw->finarray  = x_array;
185     fftw->foutarray = y_array;
186     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
187                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
188     fftw_execute(fftw->p_forward);
189   } else { /* use existing plan */
190     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
191       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
192     } else {
193       fftw_execute(fftw->p_forward);
194     }
195   }
196   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
197   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
198   PetscFunctionReturn(0);
199 }
200 
201 #undef __FUNCT__
202 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
203 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
204 {
205   PetscErrorCode ierr;
206   Mat_FFT        *fft  = (Mat_FFT*)A->data;
207   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
208   PetscScalar    *x_array,*y_array;
209   PetscInt       ndim=fft->ndim,*dim=fft->dim;
210   MPI_Comm       comm=((PetscObject)A)->comm;
211 //  PetscInt       ctr;
212 //  ptrdiff_t      ndim1=(ptrdiff_t)ndim,*pdim;
213 //  ndim1=(ptrdiff_t) ndim;
214 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
215 
216 //  for(ctr=0;ctr<ndim;ctr++)
217 //     {
218 //      pdim[ctr] = dim[ctr];
219 //     }
220 
221   PetscFunctionBegin;
222 //#if !defined(PETSC_USE_COMPLEX)
223 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
224 //#endif
225 //  ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr);
226 // should pdim be a member of Mat_FFTW?
227 //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
228 
229   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
230   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
231   if (!fftw->p_backward){ /* create a plan, then excute it */
232     switch (ndim){
233     case 1:
234 #if defined(PETSC_USE_COMPLEX)
235       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
236 #endif
237       break;
238     case 2:
239 #if defined(PETSC_USE_COMPLEX)
240       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
241 #else
242       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
243 #endif
244       break;
245     case 3:
246 #if defined(PETSC_USE_COMPLEX)
247       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
248 #else
249       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
250 #endif
251       break;
252     default:
253 #if defined(PETSC_USE_COMPLEX)
254       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
255 #else
256       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
257 #endif
258 //      fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
259       break;
260     }
261     fftw->binarray  = x_array;
262     fftw->boutarray = y_array;
263     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
264   } else { /* use existing plan */
265     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
266       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
267     } else {
268       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
269     }
270   }
271   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
272   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatDestroy_FFTW"
278 PetscErrorCode MatDestroy_FFTW(Mat A)
279 {
280   Mat_FFT        *fft = (Mat_FFT*)A->data;
281   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
282   PetscErrorCode ierr;
283 
284   PetscFunctionBegin;
285 #if !defined(PETSC_USE_COMPLEX)
286   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
287 #endif
288   fftw_destroy_plan(fftw->p_forward);
289   fftw_destroy_plan(fftw->p_backward);
290   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
291   ierr = PetscFree(fft->data);CHKERRQ(ierr);
292   PetscFunctionReturn(0);
293 }
294 
295 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
296 #undef __FUNCT__
297 #define __FUNCT__ "VecDestroy_MPIFFTW"
298 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
299 {
300   PetscErrorCode  ierr;
301   PetscScalar     *array;
302 
303   PetscFunctionBegin;
304 #if !defined(PETSC_USE_COMPLEX)
305   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
306 #endif
307   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
308   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
309   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
310   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
311   PetscFunctionReturn(0);
312 }
313 
314 #undef __FUNCT__
315 #define __FUNCT__ "MatGetVecs1DC_FFTW"
316 /*
317    MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the
318      parallel layout determined by FFTW-1D
319 
320 */
321 PetscErrorCode  MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout)
322 {
323   PetscErrorCode ierr;
324   PetscMPIInt    size,rank;
325   MPI_Comm       comm=((PetscObject)A)->comm;
326   Mat_FFT        *fft = (Mat_FFT*)A->data;
327 //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
328   PetscInt       N=fft->N;
329   PetscInt       ndim=fft->ndim,*dim=fft->dim;
330   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
331   ptrdiff_t      f_local_n1,f_local_1_end;
332   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
333   ptrdiff_t      b_local_n1,b_local_1_end;
334   fftw_complex   *data_fin,*data_fout,*data_bin,*data_bout;
335 
336   PetscFunctionBegin;
337 #if !defined(PETSC_USE_COMPLEX)
338   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
339 #endif
340   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
341   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
342   if (size == 1){
343     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
344   }
345   else {
346       if (ndim>1){
347         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
348       else {
349           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
350           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
351           if (fin) {
352             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
353             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
354             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
355           }
356           if (fout) {
357             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
358             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
359             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
360           }
361           if (bin) {
362             data_bin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
363             ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr);
364             (*bin)->ops->destroy   = VecDestroy_MPIFFTW;
365           }
366           if (bout) {
367             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
368             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
369             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
370           }
371   }
372   if (fin){
373     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
374   }
375   if (fout){
376     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
377   }
378   if (bin){
379     ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr);
380   }
381   if (bout){
382     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
383   }
384   PetscFunctionReturn(0);
385 }
386 
387 
388 }
389 
390 #undef __FUNCT__
391 #define __FUNCT__ "MatGetVecs_FFTW"
392 /*
393    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
394      parallel layout determined by FFTW
395 
396    Collective on Mat
397 
398    Input Parameter:
399 .  mat - the matrix
400 
401    Output Parameter:
402 +   fin - (optional) input vector of forward FFTW
403 -   fout - (optional) output vector of forward FFTW
404 
405   Level: advanced
406 
407 .seealso: MatCreateFFTW()
408 */
409 PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
410 {
411   PetscErrorCode ierr;
412   PetscMPIInt    size,rank;
413   MPI_Comm       comm=((PetscObject)A)->comm;
414   Mat_FFT        *fft = (Mat_FFT*)A->data;
415   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
416   PetscInt       N=fft->N, N1, n1,vsize;
417 
418   PetscFunctionBegin;
419 //#if !defined(PETSC_USE_COMPLEX)
420 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
421 //#endif
422   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
423   PetscValidType(A,1);
424 
425   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
426   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
427   if (size == 1){ /* sequential case */
428 #if defined(PETSC_USE_COMPLEX)
429     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
430     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
431 #else
432     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
433     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*(N/2+1),fout);CHKERRQ(ierr);}
434 #endif
435     printf("The code successfully comes at the end of the routine with one processor\n");
436   } else {        /* mpi case */
437     ptrdiff_t      alloc_local,local_n0,local_0_start;
438     ptrdiff_t      local_n1,local_1_end;
439     PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
440     fftw_complex   *data_fin,*data_fout;
441     double         *data_finr ;
442     ptrdiff_t      local_1_start,temp;
443 //    PetscInt ctr;
444 //    ptrdiff_t      ndim1,*pdim;
445 //    ndim1=(ptrdiff_t) ndim;
446 //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
447 
448 //    for(ctr=0;ctr<ndim;ctr++)
449 //        {k
450 //           pdim[ctr] = dim[ctr];
451 //       }
452 
453 
454 
455     switch (ndim){
456     case 1:
457       /* Get local size */
458       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
459 //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
460       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
461       if (fin) {
462         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
463         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
464         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
465       }
466       if (fout) {
467         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
468         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
469         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
470       }
471       break;
472     case 2:
473 #if !defined(PETSC_USE_COMPLEX)
474       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
475       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
476       if (fin) {
477         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
478         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
479         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
480         //printf("The code comes here with vector size %d\n",vsize);
481         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
482       }
483       if (fout) {
484         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
485         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
486         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
487       }
488       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
489 
490 #else
491       /* Get local size */
492      printf("Hope this does not come here");
493       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
494       if (fin) {
495         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
496         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
497         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
498       }
499       if (fout) {
500         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
501         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
502         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
503       }
504      printf("Hope this does not come here");
505 #endif
506       break;
507     case 3:
508       /* Get local size */
509 #if !defined(PETSC_USE_COMPLEX)
510       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
511       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
512       if (fin) {
513         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
514         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
515         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
516         //printf("The code comes here with vector size %d\n",vsize);
517         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
518       }
519       if (fout) {
520         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
521         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
522         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
523       }
524       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
525 
526 
527 #else
528       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
529 //      printf("The quantity n is %d",n);
530       if (fin) {
531         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
532         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
533         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
534       }
535       if (fout) {
536         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
537         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
538         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
539       }
540 #endif
541       break;
542     default:
543       /* Get local size */
544 #if !defined(PETSC_USE_COMPLEX)
545       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
546       printf("The value of temp is %ld\n",temp);
547       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
548       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
549       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
550       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
551       if (fin) {
552         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
553         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
554         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
555         //printf("The code comes here with vector size %d\n",vsize);
556         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
557       }
558       if (fout) {
559         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
560         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
561         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
562       }
563 
564 #else
565       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
566 //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
567 //      printf("The value of alloc local is %d",alloc_local);
568 //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
569 //      for(i=0;i<ndim;i++)
570 //         {
571 //          pdim[i]=dim[i];printf("%d",pdim[i]);
572 //         }
573 //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
574 //      printf("The quantity n is %d",n);
575       if (fin) {
576         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
577         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
578         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
579       }
580       if (fout) {
581         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
582         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
583         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
584       }
585 #endif
586       break;
587     }
588   }
589 //  if (fin){
590 //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
591 //  }
592 //  if (fout){
593 //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
594 //  }
595   PetscFunctionReturn(0);
596 }
597 
598 #undef __FUNCT__
599 #define __FUNCT__ "InputTransformFFT"
600 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
601 {
602   PetscErrorCode ierr;
603   PetscFunctionBegin;
604   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 /*
609       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block
610   Input A, x, y
611   A - FFTW matrix
612   x - user data
613   Options Database Keys:
614 + -mat_fftw_plannerflags - set FFTW planner flags
615 
616    Level: intermediate
617 
618 */
619 
620 EXTERN_C_BEGIN
621 #undef __FUNCT__
622 #define __FUNCT__ "InputTransformFFT_FTTW"
623 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
624 {
625   PetscErrorCode ierr;
626   MPI_Comm       comm=((PetscObject)A)->comm;
627   Mat_FFT        *fft = (Mat_FFT*)A->data;
628   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
629   PetscInt       N=fft->N, N1, n1 ,NM;
630   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
631   PetscInt       low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4;
632   PetscInt       i,j,k,rank,size,partial_dim;
633   ptrdiff_t      alloc_local,local_n0,local_0_start;
634   ptrdiff_t      local_n1,local_1_start,temp;
635   VecScatter     vecscat;
636   IS             list1,list2;
637 
638   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
639   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
640   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
641   printf("Local ownership starts at %d\n",low);
642 
643  switch (ndim){
644  case 1:
645   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
646   break;
647  case 2:
648       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
649       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
650 
651       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
652       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
653       printf("Val local_0_start = %ld\n",local_0_start);
654 
655       if (dim[1]%2==0)
656         NM = dim[1]+2;
657       else
658         NM = dim[1]+1;
659 
660       for (i=0;i<local_n0;i++){
661          for (j=0;j<dim[1];j++){
662             tempindx = i*dim[1] + j;
663             tempindx1 = i*NM + j;
664             indx1[tempindx]=local_0_start*dim[1]+tempindx;
665             indx2[tempindx]=low+tempindx1;
666            // printf("Val tempindx1 = %d\n",tempindx1);
667   //          printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
668   //          printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
669   //          printf("-------------------------\n",indx2[tempindx],rank);
670         }
671      }
672 
673       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
674       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
675 
676       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
677       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
678       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
679       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
680       break;
681 
682  case 3:
683       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
684       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
685 
686       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
687       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
688       printf("Val local_0_start = %ld\n",local_0_start);
689 
690       if (dim[2]%2==0)
691         NM = dim[2]+2;
692       else
693         NM = dim[2]+1;
694 
695       for (i=0;i<local_n0;i++){
696          for (j=0;j<dim[1];j++){
697             for (k=0;k<dim[2];k++){
698                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
699                tempindx1 = i*dim[1]*NM + j*NM + k;
700                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
701                indx2[tempindx]=low+tempindx1;
702             }
703            // printf("Val tempindx1 = %d\n",tempindx1);
704            // printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
705            // printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
706            // printf("-------------------------\n",indx2[tempindx],rank);
707         }
708      }
709 
710       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
711       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
712 
713       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
714       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
715       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
716       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
717       break;
718 
719  default:
720       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
721       printf("The value of temp is %ld\n",temp);
722       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
723       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
724       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
725       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
726 
727       partial_dim = fftw->partial_dim;
728       printf("The value of partial dim is %d\n",partial_dim);
729 
730       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
731       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
732       printf("Val local_0_start = %ld\n",local_0_start);
733 
734       if (dim[ndim-1]%2==0)
735         NM = dim[ndim-1]+2;
736       else
737         NM = dim[ndim-1]+1;
738 
739 
740 
741   break;
742  }
743 
744  return 0;
745 }
746 EXTERN_C_END
747 
748 #undef __FUNCT__
749 #define __FUNCT__ "OutputTransformFFT"
750 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
751 {
752   PetscErrorCode ierr;
753   PetscFunctionBegin;
754   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
755   PetscFunctionReturn(0);
756 }
757 
758 /*
759       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
760   Input A, x, y
761   A - FFTW matrix
762   x - FFTW vector
763   y - PETSc vector that user can read
764   Options Database Keys:
765 + -mat_fftw_plannerflags - set FFTW planner flags
766 
767    Level: intermediate
768 
769 */
770 
771 EXTERN_C_BEGIN
772 #undef __FUNCT__
773 #define __FUNCT__ "OutputTransformFFT_FTTW"
774 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
775 {
776   PetscErrorCode ierr;
777   MPI_Comm       comm=((PetscObject)A)->comm;
778   Mat_FFT        *fft = (Mat_FFT*)A->data;
779   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
780   PetscInt       N=fft->N, N1, n1 ,NM;
781   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
782   PetscInt       low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4;
783   PetscInt       i,j,k,rank,size;
784   ptrdiff_t      alloc_local,local_n0,local_0_start;
785   ptrdiff_t      local_n1,local_1_start;
786   VecScatter     vecscat;
787   IS             list1,list2;
788 
789   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
790   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
791   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);
792   printf("Local ownership starts at %d\n",low);
793 
794  switch (ndim){
795  case 1:
796   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
797   break;
798  case 2:
799       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
800       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
801 
802      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
803      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
804      printf("Val local_0_start = %ld\n",local_0_start);
805 
806      if (dim[1]%2==0)
807       NM = dim[1]+2;
808     else
809       NM = dim[1]+1;
810 
811 
812 
813      for (i=0;i<local_n0;i++){
814         for (j=0;j<dim[1];j++){
815             tempindx = i*dim[1] + j;
816             tempindx1 = i*NM + j;
817             indx1[tempindx]=local_0_start*dim[1]+tempindx;
818             indx2[tempindx]=low+tempindx1;
819        //     printf("Val tempindx1 = %d\n",tempindx1);
820        //     printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
821        //     printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
822        //     printf("-------------------------\n",indx2[tempindx],rank);
823         }
824      }
825 
826      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
827      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
828 
829      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
830      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
831      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
832      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
833   break;
834 
835  case 3:
836       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
837       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
838 
839      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
840      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
841      printf("Val local_0_start = %ld\n",local_0_start);
842 
843      if (dim[2]%2==0)
844       NM = dim[2]+2;
845     else
846       NM = dim[2]+1;
847 
848      for (i=0;i<local_n0;i++){
849         for (j=0;j<dim[1];j++){
850            for (k=0;k<dim[2];k++){
851               tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
852               tempindx1 = i*dim[1]*NM + j*NM + k;
853               indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
854               indx2[tempindx]=low+tempindx1;
855            }
856         //    printf("Val tempindx1 = %d\n",tempindx1);
857         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
858         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
859         //    printf("-------------------------\n",indx2[tempindx],rank);
860         }
861      }
862 
863      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
864      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
865 
866      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
867      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
870   break;
871 
872  default:
873   SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet");
874   break;
875  }
876  return 0;
877 }
878 EXTERN_C_END
879 
880 EXTERN_C_BEGIN
881 #undef __FUNCT__
882 #define __FUNCT__ "MatCreate_FFTW"
883 /*
884       MatCreate_FFTW - Creates a matrix object that provides FFT
885   via the external package FFTW
886   Options Database Keys:
887 + -mat_fftw_plannerflags - set FFTW planner flags
888 
889    Level: intermediate
890 
891 */
892 
893 PetscErrorCode MatCreate_FFTW(Mat A)
894 {
895   PetscErrorCode ierr;
896   MPI_Comm       comm=((PetscObject)A)->comm;
897   Mat_FFT        *fft=(Mat_FFT*)A->data;
898   Mat_FFTW       *fftw;
899   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
900   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
901   PetscBool      flg;
902   PetscInt       p_flag,partial_dim=1,ctr,N1;
903   PetscMPIInt    size,rank;
904   ptrdiff_t      *pdim, temp;
905   ptrdiff_t      local_n1,local_1_start;
906 
907   PetscFunctionBegin;
908 //#if !defined(PETSC_USE_COMPLEX)
909 //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
910 //#endif
911 
912   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
913   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
914   ierr = MPI_Barrier(PETSC_COMM_WORLD);
915 
916   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
917   pdim[0] = dim[0];
918   for(ctr=1;ctr<ndim;ctr++)
919       {
920           partial_dim *= dim[ctr];
921           pdim[ctr] = dim[ctr];
922       }
923 //#if !defined(PETSC_USE_COMPLEX)
924 //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
925 //#endif
926 
927 //  printf("partial dimension is %d",partial_dim);
928   if (size == 1) {
929     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
930     n = N;
931   } else {
932     ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end;
933     switch (ndim){
934     case 1:
935 #if !defined(PETSC_USE_COMPLEX)
936   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
937 #else
938       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
939       n = (PetscInt)local_n0;
940       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
941 #endif
942 //      PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW);
943       break;
944     case 2:
945 #if defined(PETSC_USE_COMPLEX)
946       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
947       /*
948        PetscMPIInt    rank;
949        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
950        PetscSynchronizedFlush(comm);
951        */
952       n = (PetscInt)local_n0*dim[1];
953       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
954 #else
955       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
956       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
957       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
958 #endif
959       break;
960     case 3:
961 //      printf("The value of alloc local is %d",alloc_local);
962 #if defined(PETSC_USE_COMPLEX)
963       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
964       n = (PetscInt)local_n0*dim[1]*dim[2];
965       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
966 #else
967       printf("The code comes here\n");
968       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
969       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
970       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
971 #endif
972       break;
973     default:
974 #if defined(PETSC_USE_COMPLEX)
975       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
976 //      printf("The value of alloc local is %ld from process %d\n",alloc_local,rank);
977 //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
978       n = (PetscInt)local_n0*partial_dim;
979 //      printf("New partial dimension is %d %d %d",n,N,ndim);
980       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
981 #else
982       temp = pdim[ndim-1];
983       pdim[ndim-1]= temp/2 + 1;
984       printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]);
985       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
986       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
987       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
988       pdim[ndim-1] = temp;
989       printf("For Multi dim case n = %d, N1  = %d\n",n,N1);
990       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
991 #endif
992       break;
993     }
994   }
995   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
996   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
997   fft->data = (void*)fftw;
998 
999   fft->n           = n;
1000   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1001   fftw->partial_dim = partial_dim;
1002   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1003   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1004 
1005   fftw->p_forward  = 0;
1006   fftw->p_backward = 0;
1007   fftw->p_flag     = FFTW_ESTIMATE;
1008 
1009   if (size == 1){
1010     A->ops->mult          = MatMult_SeqFFTW;
1011     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1012   } else {
1013     A->ops->mult          = MatMult_MPIFFTW;
1014     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1015   }
1016   fft->matdestroy          = MatDestroy_FFTW;
1017   A->ops->getvecs       = MatGetVecs_FFTW;
1018   A->assembled          = PETSC_TRUE;
1019 #if !defined(PETSC_USE_COMPLEX)
1020   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
1021   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
1022 #endif
1023 
1024   /* get runtime options */
1025   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1026     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1027     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1028   PetscOptionsEnd();
1029   PetscFunctionReturn(0);
1030 }
1031 EXTERN_C_END
1032 
1033 
1034 
1035 
1036