xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 51d1eed7acd58e08ba34a61d6505cc587917f279)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   fftw_plan   p_forward,p_backward;
15   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
16   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
17                                                             executed for the arrays with which the plan was created */
18 } Mat_FFTW;
19 
20 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
21 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatDestroy_FFTW(Mat);
25 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
26 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
27 extern PetscErrorCode InputTransformFFT_FFTW(Mat,Vec,Vec);
28 extern PetscErrorCode InputTransformFFT(Mat,Vec,Vec);
29 
30 #undef __FUNCT__
31 #define __FUNCT__ "MatMult_SeqFFTW"
32 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
33 {
34   PetscErrorCode ierr;
35   Mat_FFT        *fft  = (Mat_FFT*)A->data;
36   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
37   PetscScalar    *x_array,*y_array;
38   PetscInt       ndim=fft->ndim,*dim=fft->dim;
39 
40   PetscFunctionBegin;
41 #if !defined(PETSC_USE_COMPLEX)
42 
43   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
44 #endif
45   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
46   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
47   if (!fftw->p_forward){ /* create a plan, then excute it */
48     switch (ndim){
49     case 1:
50       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
51       break;
52     case 2:
53       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
54       break;
55     case 3:
56       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
57       break;
58     default:
59       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
60       break;
61     }
62     fftw->finarray  = x_array;
63     fftw->foutarray = y_array;
64     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
65                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
66     fftw_execute(fftw->p_forward);
67   } else { /* use existing plan */
68     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
69       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
70     } else {
71       fftw_execute(fftw->p_forward);
72     }
73   }
74   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
75   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 
79 #undef __FUNCT__
80 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
81 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
82 {
83   PetscErrorCode ierr;
84   Mat_FFT        *fft = (Mat_FFT*)A->data;
85   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
86   PetscScalar    *x_array,*y_array;
87   PetscInt       ndim=fft->ndim,*dim=fft->dim;
88 
89   PetscFunctionBegin;
90 #if !defined(PETSC_USE_COMPLEX)
91   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
92 #endif
93   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
94   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
95   if (!fftw->p_backward){ /* create a plan, then excute it */
96     switch (ndim){
97     case 1:
98       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
99       break;
100     case 2:
101       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
102       break;
103     case 3:
104       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
105       break;
106     default:
107       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
108       break;
109     }
110     fftw->binarray  = x_array;
111     fftw->boutarray = y_array;
112     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
113   } else { /* use existing plan */
114     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
115       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
116     } else {
117       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
118     }
119   }
120   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
121   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
122   PetscFunctionReturn(0);
123 }
124 
125 #undef __FUNCT__
126 #define __FUNCT__ "MatMult_MPIFFTW"
127 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
128 {
129   PetscErrorCode ierr;
130   Mat_FFT        *fft  = (Mat_FFT*)A->data;
131   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
132   PetscScalar    *x_array,*y_array;
133   PetscInt       ndim=fft->ndim,*dim=fft->dim;
134   MPI_Comm       comm=((PetscObject)A)->comm;
135 // PetscInt ctr;
136 //  ptrdiff_t      ndim1=(ptrdiff_t) ndim,*pdim;
137 //  ndim1=(ptrdiff_t) ndim;
138 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
139 
140 //  for(ctr=0;ctr<ndim;ctr++)
141 //     {
142 //      pdim[ctr] = dim[ctr];
143 //     }
144 
145   PetscFunctionBegin;
146 //#if !defined(PETSC_USE_COMPLEX)
147 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
148 //#endif
149 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
150 //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
151 
152   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
153   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
154   if (!fftw->p_forward){ /* create a plan, then excute it */
155     switch (ndim){
156     case 1:
157 #if defined(PETSC_USE_COMPLEX)
158       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
159 #endif
160       break;
161     case 2:
162 #if defined(PETSC_USE_COMPLEX)
163       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
164 #else
165       printf("The code comes here \n");
166       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
167 #endif
168       break;
169     case 3:
170 #if defined(PETSC_USE_COMPLEX)
171       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
172 #else
173       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
174 #endif
175       break;
176     default:
177 #if defined(PETSC_USE_COMPLEX)
178       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
179 #else
180       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
181 #endif
182  //     fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
183       break;
184     }
185     fftw->finarray  = x_array;
186     fftw->foutarray = y_array;
187     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
188                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
189     fftw_execute(fftw->p_forward);
190   } else { /* use existing plan */
191     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
192       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
193     } else {
194       fftw_execute(fftw->p_forward);
195     }
196   }
197   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
198   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
199   PetscFunctionReturn(0);
200 }
201 
202 #undef __FUNCT__
203 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
204 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
205 {
206   PetscErrorCode ierr;
207   Mat_FFT        *fft  = (Mat_FFT*)A->data;
208   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
209   PetscScalar    *x_array,*y_array;
210   PetscInt       ndim=fft->ndim,*dim=fft->dim;
211   MPI_Comm       comm=((PetscObject)A)->comm;
212 //  PetscInt       ctr;
213 //  ptrdiff_t      ndim1=(ptrdiff_t)ndim,*pdim;
214 //  ndim1=(ptrdiff_t) ndim;
215 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
216 
217 //  for(ctr=0;ctr<ndim;ctr++)
218 //     {
219 //      pdim[ctr] = dim[ctr];
220 //     }
221 
222   PetscFunctionBegin;
223 //#if !defined(PETSC_USE_COMPLEX)
224 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
225 //#endif
226 //  ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr);
227 // should pdim be a member of Mat_FFTW?
228 //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
229 
230   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
231   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
232   if (!fftw->p_backward){ /* create a plan, then excute it */
233     switch (ndim){
234     case 1:
235 #if defined(PETSC_USE_COMPLEX)
236       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
237 #endif
238       break;
239     case 2:
240 #if defined(PETSC_USE_COMPLEX)
241       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
242 #else
243       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
244 #endif
245       break;
246     case 3:
247 #if defined(PETSC_USE_COMPLEX)
248       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
249 #else
250       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
251 #endif
252       break;
253     default:
254 #if defined(PETSC_USE_COMPLEX)
255       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
256 #else
257       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
258 #endif
259 //      fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
260       break;
261     }
262     fftw->binarray  = x_array;
263     fftw->boutarray = y_array;
264     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
265   } else { /* use existing plan */
266     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
267       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
268     } else {
269       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
270     }
271   }
272   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
273   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatDestroy_FFTW"
279 PetscErrorCode MatDestroy_FFTW(Mat A)
280 {
281   Mat_FFT        *fft = (Mat_FFT*)A->data;
282   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
283   PetscErrorCode ierr;
284 
285   PetscFunctionBegin;
286 #if !defined(PETSC_USE_COMPLEX)
287   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
288 #endif
289   fftw_destroy_plan(fftw->p_forward);
290   fftw_destroy_plan(fftw->p_backward);
291   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
292   ierr = PetscFree(fft->data);CHKERRQ(ierr);
293   PetscFunctionReturn(0);
294 }
295 
296 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
297 #undef __FUNCT__
298 #define __FUNCT__ "VecDestroy_MPIFFTW"
299 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
300 {
301   PetscErrorCode  ierr;
302   PetscScalar     *array;
303 
304   PetscFunctionBegin;
305 #if !defined(PETSC_USE_COMPLEX)
306   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
307 #endif
308   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
309   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
310   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
311   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatGetVecs1DC_FFTW"
317 /*
318    MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the
319      parallel layout determined by FFTW-1D
320 
321 */
322 PetscErrorCode  MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout)
323 {
324   PetscErrorCode ierr;
325   PetscMPIInt    size,rank;
326   MPI_Comm       comm=((PetscObject)A)->comm;
327   Mat_FFT        *fft = (Mat_FFT*)A->data;
328 //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
329   PetscInt       N=fft->N;
330   PetscInt       ndim=fft->ndim,*dim=fft->dim;
331   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
332   ptrdiff_t      f_local_n1,f_local_1_end;
333   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
334   ptrdiff_t      b_local_n1,b_local_1_end;
335   fftw_complex   *data_fin,*data_fout,*data_bin,*data_bout;
336 
337   PetscFunctionBegin;
338 #if !defined(PETSC_USE_COMPLEX)
339   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
340 #endif
341   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
342   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
343   if (size == 1){
344     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
345   }
346   else {
347       if (ndim>1){
348         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
349       else {
350           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
351           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
352           if (fin) {
353             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
354             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
355             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
356           }
357           if (fout) {
358             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
359             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
360             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
361           }
362           if (bin) {
363             data_bin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
364             ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr);
365             (*bin)->ops->destroy   = VecDestroy_MPIFFTW;
366           }
367           if (bout) {
368             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
369             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
370             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
371           }
372   }
373   if (fin){
374     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
375   }
376   if (fout){
377     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
378   }
379   if (bin){
380     ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr);
381   }
382   if (bout){
383     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
384   }
385   PetscFunctionReturn(0);
386 }
387 
388 
389 }
390 
391 #undef __FUNCT__
392 #define __FUNCT__ "MatGetVecs_FFTW"
393 /*
394    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
395      parallel layout determined by FFTW
396 
397    Collective on Mat
398 
399    Input Parameter:
400 .  mat - the matrix
401 
402    Output Parameter:
403 +   fin - (optional) input vector of forward FFTW
404 -   fout - (optional) output vector of forward FFTW
405 
406   Level: advanced
407 
408 .seealso: MatCreateFFTW()
409 */
410 PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
411 {
412   PetscErrorCode ierr;
413   PetscMPIInt    size,rank;
414   MPI_Comm       comm=((PetscObject)A)->comm;
415   Mat_FFT        *fft = (Mat_FFT*)A->data;
416   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
417   PetscInt       N=fft->N, N1, n1,vsize;
418 
419   PetscFunctionBegin;
420 //#if !defined(PETSC_USE_COMPLEX)
421 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
422 //#endif
423   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
424   PetscValidType(A,1);
425 
426   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
427   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
428   if (size == 1){ /* sequential case */
429     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
430     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
431     printf("The code successfully comes at the end of the routine with one processor\n");
432   } else {        /* mpi case */
433     ptrdiff_t      alloc_local,local_n0,local_0_start;
434     ptrdiff_t      local_n1,local_1_end;
435     PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
436     fftw_complex   *data_fin,*data_fout;
437     double         *data_finr ;
438     ptrdiff_t local_1_start;
439 //    PetscInt ctr;
440 //    ptrdiff_t      ndim1,*pdim;
441 //    ndim1=(ptrdiff_t) ndim;
442 //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
443 
444 //    for(ctr=0;ctr<ndim;ctr++)
445 //        {k
446 //           pdim[ctr] = dim[ctr];
447 //       }
448 
449 
450 
451     switch (ndim){
452     case 1:
453       /* Get local size */
454       /* We need to write an error message here saying that one cannot call this routine when doing paralllel 1D complex FFTW */
455 //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
456       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
457       if (fin) {
458         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
459         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
460         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
461       }
462       if (fout) {
463         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
464         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
465         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
466       }
467       break;
468     case 2:
469 #if !defined(PETSC_USE_COMPLEX)
470       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
471       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
472       if (fin) {
473         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
474         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
475         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
476         //printf("The code comes here with vector size %d\n",vsize);
477         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
478       }
479       if (fout) {
480         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
481         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
482         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
483       }
484       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
485 
486 #else
487       /* Get local size */
488      printf("Hope this does not come here");
489       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
490       if (fin) {
491         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
492         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
493         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
494       }
495       if (fout) {
496         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
497         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
498         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
499       }
500      printf("Hope this does not come here");
501 #endif
502       break;
503     case 3:
504       /* Get local size */
505 #if !defined(PETSC_USE_COMPLEX)
506       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
507       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
508       if (fin) {
509         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
510         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
511         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
512         //printf("The code comes here with vector size %d\n",vsize);
513         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
514       }
515       if (fout) {
516         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
517         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
518         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
519       }
520       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
521 
522 
523 #else
524       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
525 //      printf("The quantity n is %d",n);
526       if (fin) {
527         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
528         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
529         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
530       }
531       if (fout) {
532         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
533         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
534         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
535       }
536 #endif
537       break;
538     default:
539       /* Get local size */
540 #if !defined(PETSC_USE_COMPLEX)
541   SETERRQ(comm,PETSC_ERR_SUP,"Not done yet");
542 #else
543       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
544 //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
545 //      printf("The value of alloc local is %d",alloc_local);
546 //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
547 //      for(i=0;i<ndim;i++)
548 //         {
549 //          pdim[i]=dim[i];printf("%d",pdim[i]);
550 //         }
551 //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
552 //      printf("The quantity n is %d",n);
553       if (fin) {
554         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
555         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
556         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
557       }
558       if (fout) {
559         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
560         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
561         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
562       }
563 #endif
564       break;
565     }
566   }
567 //  if (fin){
568 //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
569 //  }
570 //  if (fout){
571 //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
572 //  }
573   PetscFunctionReturn(0);
574 }
575 
576 //EXTERN_C_BEGIN - Do we need this?
577 #undef __FUNCT__
578 #define __FUNCT__ "InputTransformFFT"
579 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
580 {
581   PetscErrorCode ierr;
582   PetscFunctionBegin;
583   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
584   PetscFunctionReturn(0);
585 }
586 //EXTERN_C_END - Do we need this?
587 /*
588       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block
589   Input A, x, y
590   A - FFTW matrix
591   x - user data
592   Options Database Keys:
593 + -mat_fftw_plannerflags - set FFTW planner flags
594 
595    Level: intermediate
596 
597 */
598 
599 EXTERN_C_BEGIN
600 #undef __FUNCT__
601 #define __FUNCT__ "InputTransformFFT_FTTW"
602 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
603 {
604   PetscErrorCode ierr;
605   MPI_Comm       comm=((PetscObject)A)->comm;
606   Mat_FFT        *fft = (Mat_FFT*)A->data;
607   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
608   PetscInt       N=fft->N, N1, n1 ,NM;
609   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
610   PetscInt       low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4;
611   PetscInt       i,j,k,rank,size;
612   ptrdiff_t      alloc_local,local_n0,local_0_start;
613   ptrdiff_t      local_n1,local_1_start;
614   VecScatter     vecscat;
615   IS             list1,list2;
616 
617   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
618   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
619   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
620   printf("Local ownership starts at %d\n",low);
621 
622  switch (ndim){
623  case 1:
624   SETERRQ(comm,PETSC_ERR_SUP,"Not Supported by FFTW");
625   break;
626  case 2:
627       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
628       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
629 
630       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
631       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
632       printf("Val local_0_start = %ld\n",local_0_start);
633 
634       if (dim[1]%2==0)
635         NM = dim[1]+2;
636       else
637         NM = dim[1]+1;
638 
639       for (i=0;i<local_n0;i++){
640          for (j=0;j<dim[1];j++){
641             tempindx = i*dim[1] + j;
642             tempindx1 = i*NM + j;
643             indx1[tempindx]=local_0_start*dim[1]+tempindx;
644             indx2[tempindx]=low+tempindx1;
645            // printf("Val tempindx1 = %d\n",tempindx1);
646   //          printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
647   //          printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
648   //          printf("-------------------------\n",indx2[tempindx],rank);
649         }
650      }
651 
652       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
653       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
654 
655       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
656       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
657       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
658       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
659       break;
660 
661  case 3:
662       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
663       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
664 
665       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
666       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
667       printf("Val local_0_start = %ld\n",local_0_start);
668 
669       if (dim[2]%2==0)
670         NM = dim[2]+2;
671       else
672         NM = dim[2]+1;
673 
674       for (i=0;i<local_n0;i++){
675          for (j=0;j<dim[1];j++){
676             for (k=0;k<dim[2];k++){
677                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
678                tempindx1 = i*dim[1]*NM + j*NM + k;
679                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
680                indx2[tempindx]=low+tempindx1;
681             }
682            // printf("Val tempindx1 = %d\n",tempindx1);
683            // printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
684            // printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
685            // printf("-------------------------\n",indx2[tempindx],rank);
686         }
687      }
688 
689       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
690       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
691 
692       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
693       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
694       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
695       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
696       break;
697 
698  default:
699   SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet");
700   break;
701  }
702 
703  return 0;
704 }
705 EXTERN_C_END
706 
707 
708 //EXTERN_C_BEGIN - Do we need this?
709 #undef __FUNCT__
710 #define __FUNCT__ "OutputTransformFFT"
711 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
712 {
713   PetscErrorCode ierr;
714   PetscFunctionBegin;
715   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
716   PetscFunctionReturn(0);
717 }
718 //EXTERN_C_END - Do we need this?
719 /*
720       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
721   Input A, x, y
722   A - FFTW matrix
723   x - FFTW vector
724   y - PETSc vector that user can read
725   Options Database Keys:
726 + -mat_fftw_plannerflags - set FFTW planner flags
727 
728    Level: intermediate
729 
730 */
731 
732 EXTERN_C_BEGIN
733 #undef __FUNCT__
734 #define __FUNCT__ "OutputTransformFFT_FTTW"
735 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
736 {
737   PetscErrorCode ierr;
738   MPI_Comm       comm=((PetscObject)A)->comm;
739   Mat_FFT        *fft = (Mat_FFT*)A->data;
740   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
741   PetscInt       N=fft->N, N1, n1 ,NM;
742   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
743   PetscInt       low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4;
744   PetscInt       i,j,k,rank,size;
745   ptrdiff_t      alloc_local,local_n0,local_0_start;
746   ptrdiff_t      local_n1,local_1_start;
747   VecScatter     vecscat;
748   IS             list1,list2;
749 
750   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
751   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
752   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);
753   printf("Local ownership starts at %d\n",low);
754 
755  switch (ndim){
756  case 1:
757   SETERRQ(comm,PETSC_ERR_SUP,"Not Supported by FFTW");
758   break;
759  case 2:
760       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
761       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
762 
763      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
764      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
765      printf("Val local_0_start = %ld\n",local_0_start);
766 
767      if (dim[1]%2==0)
768       NM = dim[1]+2;
769     else
770       NM = dim[1]+1;
771 
772 
773 
774      for (i=0;i<local_n0;i++){
775         for (j=0;j<dim[1];j++){
776             tempindx = i*dim[1] + j;
777             tempindx1 = i*NM + j;
778             indx1[tempindx]=local_0_start*dim[1]+tempindx;
779             indx2[tempindx]=low+tempindx1;
780        //     printf("Val tempindx1 = %d\n",tempindx1);
781        //     printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
782        //     printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
783        //     printf("-------------------------\n",indx2[tempindx],rank);
784         }
785      }
786 
787      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
788      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
789 
790      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
791      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
792      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
793      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
794   break;
795 
796  case 3:
797       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
798       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
799 
800      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
801      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
802      printf("Val local_0_start = %ld\n",local_0_start);
803 
804      if (dim[2]%2==0)
805       NM = dim[2]+2;
806     else
807       NM = dim[2]+1;
808 
809      for (i=0;i<local_n0;i++){
810         for (j=0;j<dim[1];j++){
811            for (k=0;k<dim[2];k++){
812               tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
813               tempindx1 = i*dim[1]*NM + j*NM + k;
814               indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
815               indx2[tempindx]=low+tempindx1;
816            }
817         //    printf("Val tempindx1 = %d\n",tempindx1);
818         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
819         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
820         //    printf("-------------------------\n",indx2[tempindx],rank);
821         }
822      }
823 
824      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
825      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
826 
827      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
828      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
829      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
830      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
831   break;
832 
833  default:
834   SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet");
835   break;
836  }
837 
838  return 0;
839 }
840 EXTERN_C_END
841 
842 
843 
844 
845 EXTERN_C_BEGIN
846 #undef __FUNCT__
847 #define __FUNCT__ "MatCreate_FFTW"
848 /*
849       MatCreate_FFTW - Creates a matrix object that provides FFT
850   via the external package FFTW
851   Options Database Keys:
852 + -mat_fftw_plannerflags - set FFTW planner flags
853 
854    Level: intermediate
855 
856 */
857 
858 PetscErrorCode MatCreate_FFTW(Mat A)
859 {
860   PetscErrorCode ierr;
861   MPI_Comm       comm=((PetscObject)A)->comm;
862   Mat_FFT        *fft=(Mat_FFT*)A->data;
863   Mat_FFTW       *fftw;
864   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
865   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
866   PetscBool      flg;
867   PetscInt       p_flag,partial_dim=1,ctr;
868   PetscMPIInt    size,rank;
869   ptrdiff_t      *pdim;
870   ptrdiff_t      local_n1,local_1_start;
871 
872   PetscFunctionBegin;
873 //#if !defined(PETSC_USE_COMPLEX)
874 //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
875 //#endif
876 
877   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
878   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
879   ierr = MPI_Barrier(PETSC_COMM_WORLD);
880 
881   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
882   pdim[0] = dim[0];
883   for(ctr=1;ctr<ndim;ctr++)
884       {
885           partial_dim *= dim[ctr];
886           pdim[ctr] = dim[ctr];
887       }
888 //#if !defined(PETSC_USE_COMPLEX)
889 //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
890 //#endif
891 
892 //  printf("partial dimension is %d",partial_dim);
893   if (size == 1) {
894     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
895     n = N;
896   } else {
897     ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end;
898     switch (ndim){
899     case 1:
900 #if !defined(PETSC_USE_COMPLEX)
901   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
902 #endif
903       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
904       n = (PetscInt)local_n0;
905       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
906 //      PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW);
907       break;
908     case 2:
909 #if defined(PETSC_USE_COMPLEX)
910       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
911       /*
912        PetscMPIInt    rank;
913        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
914        PetscSynchronizedFlush(comm);
915        */
916       n = (PetscInt)local_n0*dim[1];
917       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
918 #else
919       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
920       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
921       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
922 #endif
923       break;
924     case 3:
925 //      printf("The value of alloc local is %d",alloc_local);
926 #if defined(PETSC_USE_COMPLEX)
927       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
928       n = (PetscInt)local_n0*dim[1]*dim[2];
929       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
930 #else
931       printf("The code comes here\n");
932       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
933       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
934       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
935 #endif
936       break;
937     default:
938       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
939 //      printf("The value of alloc local is %ld from process %d\n",alloc_local,rank);
940 //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
941       n = (PetscInt)local_n0*partial_dim;
942 //      printf("New partial dimension is %d %d %d",n,N,ndim);
943       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
944       break;
945     }
946   }
947   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
948 
949   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
950   fft->data = (void*)fftw;
951 
952   fft->n           = n;
953   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
954   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
955   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
956 
957   fftw->p_forward  = 0;
958   fftw->p_backward = 0;
959   fftw->p_flag     = FFTW_ESTIMATE;
960 
961   if (size == 1){
962     A->ops->mult          = MatMult_SeqFFTW;
963     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
964   } else {
965     A->ops->mult          = MatMult_MPIFFTW;
966     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
967   }
968   fft->matdestroy          = MatDestroy_FFTW;
969   A->ops->getvecs       = MatGetVecs_FFTW;
970   A->assembled          = PETSC_TRUE;
971 #if !defined(PETSC_USE_COMPLEX)
972   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
973   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
974 #endif
975 
976   /* get runtime options */
977   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
978     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
979     if (flg) {fftw->p_flag = (unsigned)p_flag;}
980   PetscOptionsEnd();
981   PetscFunctionReturn(0);
982 }
983 EXTERN_C_END
984 
985 
986 
987 
988