xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 5b3e41ffffdce1b7ff4408884893bcc5bc358f62)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
15b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
16b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
17b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
18b2b77a04SHong Zhang } Mat_FFTW;
19b2b77a04SHong Zhang 
20b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
21b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
25b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
26b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
27*5b3e41ffSAmlan Barua extern PetscErrorCode InputTransformFFT_FFTW(Mat,Vec,Vec);
28*5b3e41ffSAmlan Barua extern PetscErrorCode InputTransformFFT(Mat,Vec,Vec);
29b2b77a04SHong Zhang 
30b2b77a04SHong Zhang #undef __FUNCT__
31b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
32b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
33b2b77a04SHong Zhang {
34b2b77a04SHong Zhang   PetscErrorCode ierr;
35b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
36b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
37b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
38b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
39b2b77a04SHong Zhang 
40b2b77a04SHong Zhang   PetscFunctionBegin;
41b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
42b9ae062cSHong Zhang 
43b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
44b2b77a04SHong Zhang #endif
45b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
46b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
47b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
48b2b77a04SHong Zhang     switch (ndim){
49b2b77a04SHong Zhang     case 1:
50b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
51b2b77a04SHong Zhang       break;
52b2b77a04SHong Zhang     case 2:
53b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
54b2b77a04SHong Zhang       break;
55b2b77a04SHong Zhang     case 3:
56b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
57b2b77a04SHong Zhang       break;
58b2b77a04SHong Zhang     default:
59b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
60b2b77a04SHong Zhang       break;
61b2b77a04SHong Zhang     }
62b2b77a04SHong Zhang     fftw->finarray  = x_array;
63b2b77a04SHong Zhang     fftw->foutarray = y_array;
64b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
65b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
66b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
67b2b77a04SHong Zhang   } else { /* use existing plan */
68b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
69b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
70b2b77a04SHong Zhang     } else {
71b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
72b2b77a04SHong Zhang     }
73b2b77a04SHong Zhang   }
74b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
75b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
76b2b77a04SHong Zhang   PetscFunctionReturn(0);
77b2b77a04SHong Zhang }
78b2b77a04SHong Zhang 
79b2b77a04SHong Zhang #undef __FUNCT__
80b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
81b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
82b2b77a04SHong Zhang {
83b2b77a04SHong Zhang   PetscErrorCode ierr;
84b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
85b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
86b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
87b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
88b2b77a04SHong Zhang 
89b2b77a04SHong Zhang   PetscFunctionBegin;
90b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
91b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
92b2b77a04SHong Zhang #endif
93b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
94b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
95b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
96b2b77a04SHong Zhang     switch (ndim){
97b2b77a04SHong Zhang     case 1:
98b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
99b2b77a04SHong Zhang       break;
100b2b77a04SHong Zhang     case 2:
101b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
102b2b77a04SHong Zhang       break;
103b2b77a04SHong Zhang     case 3:
104b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
105b2b77a04SHong Zhang       break;
106b2b77a04SHong Zhang     default:
107b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
108b2b77a04SHong Zhang       break;
109b2b77a04SHong Zhang     }
110b2b77a04SHong Zhang     fftw->binarray  = x_array;
111b2b77a04SHong Zhang     fftw->boutarray = y_array;
112b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
113b2b77a04SHong Zhang   } else { /* use existing plan */
114b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
115b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
116b2b77a04SHong Zhang     } else {
117b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
118b2b77a04SHong Zhang     }
119b2b77a04SHong Zhang   }
120b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
121b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
122b2b77a04SHong Zhang   PetscFunctionReturn(0);
123b2b77a04SHong Zhang }
124b2b77a04SHong Zhang 
125b2b77a04SHong Zhang #undef __FUNCT__
126b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
127b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
128b2b77a04SHong Zhang {
129b2b77a04SHong Zhang   PetscErrorCode ierr;
130b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
131b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
132b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
133c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
134b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
135c92418ccSAmlan Barua // PetscInt ctr;
136c92418ccSAmlan Barua //  ptrdiff_t      ndim1=(ptrdiff_t) ndim,*pdim;
137c92418ccSAmlan Barua //  ndim1=(ptrdiff_t) ndim;
138c92418ccSAmlan Barua //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
139c92418ccSAmlan Barua 
140c92418ccSAmlan Barua //  for(ctr=0;ctr<ndim;ctr++)
141c92418ccSAmlan Barua //     {
142c92418ccSAmlan Barua //      pdim[ctr] = dim[ctr];
143c92418ccSAmlan Barua //     }
144b2b77a04SHong Zhang 
145b2b77a04SHong Zhang   PetscFunctionBegin;
146*5b3e41ffSAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
147*5b3e41ffSAmlan Barua //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
148*5b3e41ffSAmlan Barua //#endif
149c92418ccSAmlan Barua //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
150c92418ccSAmlan Barua //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
15111902ff2SHong Zhang 
152b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
153b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
154b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
155b2b77a04SHong Zhang     switch (ndim){
156b2b77a04SHong Zhang     case 1:
1573c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
158b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1593c3a9c75SAmlan Barua #endif
160b2b77a04SHong Zhang       break;
161b2b77a04SHong Zhang     case 2:
1623c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
163b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1643c3a9c75SAmlan Barua #else
165*5b3e41ffSAmlan Barua       printf("The code comes here \n");
1663c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1673c3a9c75SAmlan Barua #endif
168b2b77a04SHong Zhang       break;
169b2b77a04SHong Zhang     case 3:
1703c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
171b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1723c3a9c75SAmlan Barua #else
1733c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[3],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1743c3a9c75SAmlan Barua #endif
175b2b77a04SHong Zhang       break;
176b2b77a04SHong Zhang     default:
1773c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
178c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1793c3a9c75SAmlan Barua #else
1803c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1813c3a9c75SAmlan Barua #endif
18211902ff2SHong Zhang  //     fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
183b2b77a04SHong Zhang       break;
184b2b77a04SHong Zhang     }
185b2b77a04SHong Zhang     fftw->finarray  = x_array;
186b2b77a04SHong Zhang     fftw->foutarray = y_array;
187b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
188b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
189b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
190b2b77a04SHong Zhang   } else { /* use existing plan */
191b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
192b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
193b2b77a04SHong Zhang     } else {
194b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
195b2b77a04SHong Zhang     }
196b2b77a04SHong Zhang   }
197b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
198b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
199b2b77a04SHong Zhang   PetscFunctionReturn(0);
200b2b77a04SHong Zhang }
201b2b77a04SHong Zhang 
202b2b77a04SHong Zhang #undef __FUNCT__
203b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
204b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
205b2b77a04SHong Zhang {
206b2b77a04SHong Zhang   PetscErrorCode ierr;
207b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
208b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
209b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
210c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
211b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
212c92418ccSAmlan Barua //  PetscInt       ctr;
213c92418ccSAmlan Barua //  ptrdiff_t      ndim1=(ptrdiff_t)ndim,*pdim;
214c92418ccSAmlan Barua //  ndim1=(ptrdiff_t) ndim;
215c92418ccSAmlan Barua //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
216c92418ccSAmlan Barua 
217c92418ccSAmlan Barua //  for(ctr=0;ctr<ndim;ctr++)
218c92418ccSAmlan Barua //     {
219c92418ccSAmlan Barua //      pdim[ctr] = dim[ctr];
220c92418ccSAmlan Barua //     }
221b2b77a04SHong Zhang 
222b2b77a04SHong Zhang   PetscFunctionBegin;
2233c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
2243c3a9c75SAmlan Barua //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
2253c3a9c75SAmlan Barua //#endif
226c92418ccSAmlan Barua //  ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr);
227c92418ccSAmlan Barua // should pdim be a member of Mat_FFTW?
228c92418ccSAmlan Barua //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
22911902ff2SHong Zhang 
230b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
231b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
232b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
233b2b77a04SHong Zhang     switch (ndim){
234b2b77a04SHong Zhang     case 1:
2353c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
236b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2373c3a9c75SAmlan Barua #endif
238b2b77a04SHong Zhang       break;
239b2b77a04SHong Zhang     case 2:
2403c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
241b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2423c3a9c75SAmlan Barua #else
2433c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2443c3a9c75SAmlan Barua #endif
245b2b77a04SHong Zhang       break;
246b2b77a04SHong Zhang     case 3:
2473c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
248b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2493c3a9c75SAmlan Barua #else
2503c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2513c3a9c75SAmlan Barua #endif
252b2b77a04SHong Zhang       break;
253b2b77a04SHong Zhang     default:
2543c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
255c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2563c3a9c75SAmlan Barua #else
2573c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2583c3a9c75SAmlan Barua #endif
259c92418ccSAmlan Barua //      fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
260b2b77a04SHong Zhang       break;
261b2b77a04SHong Zhang     }
262b2b77a04SHong Zhang     fftw->binarray  = x_array;
263b2b77a04SHong Zhang     fftw->boutarray = y_array;
264b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
265b2b77a04SHong Zhang   } else { /* use existing plan */
266b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
267b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
268b2b77a04SHong Zhang     } else {
269b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
270b2b77a04SHong Zhang     }
271b2b77a04SHong Zhang   }
272b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
273b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
274b2b77a04SHong Zhang   PetscFunctionReturn(0);
275b2b77a04SHong Zhang }
276b2b77a04SHong Zhang 
277b2b77a04SHong Zhang #undef __FUNCT__
278b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
279b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
280b2b77a04SHong Zhang {
281b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
282b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
283b2b77a04SHong Zhang   PetscErrorCode ierr;
284b2b77a04SHong Zhang 
285b2b77a04SHong Zhang   PetscFunctionBegin;
286b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
287b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
288b2b77a04SHong Zhang #endif
289b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
290b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
291b1301b2eSHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
292bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
293b2b77a04SHong Zhang   PetscFunctionReturn(0);
294b2b77a04SHong Zhang }
295b2b77a04SHong Zhang 
296c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
297b2b77a04SHong Zhang #undef __FUNCT__
298b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
299b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
300b2b77a04SHong Zhang {
301b2b77a04SHong Zhang   PetscErrorCode  ierr;
302b2b77a04SHong Zhang   PetscScalar     *array;
303b2b77a04SHong Zhang 
304b2b77a04SHong Zhang   PetscFunctionBegin;
305b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
306b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
307b2b77a04SHong Zhang #endif
308b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
309b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
310b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
311b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
312b2b77a04SHong Zhang   PetscFunctionReturn(0);
313b2b77a04SHong Zhang }
314b2b77a04SHong Zhang 
315b2b77a04SHong Zhang #undef __FUNCT__
3163c3a9c75SAmlan Barua #define __FUNCT__ "MatGetVecs1DC_FFTW"
317c92418ccSAmlan Barua /*
318c92418ccSAmlan Barua    MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the
319c92418ccSAmlan Barua      parallel layout determined by FFTW-1D
320c92418ccSAmlan Barua 
321c92418ccSAmlan Barua */
322c92418ccSAmlan Barua PetscErrorCode  MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout)
323c92418ccSAmlan Barua {
324c92418ccSAmlan Barua   PetscErrorCode ierr;
325c92418ccSAmlan Barua   PetscMPIInt    size,rank;
326c92418ccSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
327c92418ccSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
328c92418ccSAmlan Barua //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
329c92418ccSAmlan Barua   PetscInt       N=fft->N;
330c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
331c92418ccSAmlan Barua   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
332c92418ccSAmlan Barua   ptrdiff_t      f_local_n1,f_local_1_end;
333c92418ccSAmlan Barua   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
334c92418ccSAmlan Barua   ptrdiff_t      b_local_n1,b_local_1_end;
335c92418ccSAmlan Barua   fftw_complex   *data_fin,*data_fout,*data_bin,*data_bout;
336c92418ccSAmlan Barua 
337c92418ccSAmlan Barua   PetscFunctionBegin;
338c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
339c92418ccSAmlan Barua   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
340c92418ccSAmlan Barua #endif
341c92418ccSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
342c92418ccSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
343c92418ccSAmlan Barua   if (size == 1){
344c92418ccSAmlan Barua     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
345c92418ccSAmlan Barua   }
346c92418ccSAmlan Barua   else {
347c92418ccSAmlan Barua       if (ndim>1){
348c92418ccSAmlan Barua         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
349c92418ccSAmlan Barua       else {
350c92418ccSAmlan Barua           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
351c92418ccSAmlan Barua           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
352c92418ccSAmlan Barua           if (fin) {
353c92418ccSAmlan Barua             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
354c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
355c92418ccSAmlan Barua             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
356c92418ccSAmlan Barua           }
357c92418ccSAmlan Barua           if (fout) {
358c92418ccSAmlan Barua             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
359c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
360c92418ccSAmlan Barua             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
361c92418ccSAmlan Barua           }
362c92418ccSAmlan Barua           if (bin) {
363c92418ccSAmlan Barua             data_bin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
364c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr);
365c92418ccSAmlan Barua             (*bin)->ops->destroy   = VecDestroy_MPIFFTW;
366c92418ccSAmlan Barua           }
367c92418ccSAmlan Barua           if (bout) {
368c92418ccSAmlan Barua             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
369c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
370c92418ccSAmlan Barua             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
371c92418ccSAmlan Barua           }
372c92418ccSAmlan Barua   }
373c92418ccSAmlan Barua   if (fin){
374c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
375c92418ccSAmlan Barua   }
376c92418ccSAmlan Barua   if (fout){
377c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
378c92418ccSAmlan Barua   }
379c92418ccSAmlan Barua   if (bin){
380c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr);
381c92418ccSAmlan Barua   }
382c92418ccSAmlan Barua   if (bout){
383c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
384c92418ccSAmlan Barua   }
385c92418ccSAmlan Barua   PetscFunctionReturn(0);
386c92418ccSAmlan Barua }
387c92418ccSAmlan Barua 
388c92418ccSAmlan Barua 
389c92418ccSAmlan Barua }
3903c3a9c75SAmlan Barua 
391c92418ccSAmlan Barua #undef __FUNCT__
392b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW"
393b2b77a04SHong Zhang /*
394b2b77a04SHong Zhang    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
395b2b77a04SHong Zhang      parallel layout determined by FFTW
396b2b77a04SHong Zhang 
397b2b77a04SHong Zhang    Collective on Mat
398b2b77a04SHong Zhang 
399b2b77a04SHong Zhang    Input Parameter:
400b2b77a04SHong Zhang .  mat - the matrix
401b2b77a04SHong Zhang 
402b2b77a04SHong Zhang    Output Parameter:
403b2b77a04SHong Zhang +   fin - (optional) input vector of forward FFTW
404b2b77a04SHong Zhang -   fout - (optional) output vector of forward FFTW
405b2b77a04SHong Zhang 
406b2b77a04SHong Zhang   Level: advanced
407b2b77a04SHong Zhang 
408b2b77a04SHong Zhang .seealso: MatCreateFFTW()
409b2b77a04SHong Zhang */
410b2b77a04SHong Zhang PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
411b2b77a04SHong Zhang {
412b2b77a04SHong Zhang   PetscErrorCode ierr;
413b2b77a04SHong Zhang   PetscMPIInt    size,rank;
414b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
415b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
416c92418ccSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
4173c3a9c75SAmlan Barua   PetscInt       N=fft->N, N1, n1,vsize;
418b2b77a04SHong Zhang 
419b2b77a04SHong Zhang   PetscFunctionBegin;
4203c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
4213c3a9c75SAmlan Barua //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
4223c3a9c75SAmlan Barua //#endif
423b2b77a04SHong Zhang   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
424b2b77a04SHong Zhang   PetscValidType(A,1);
425b2b77a04SHong Zhang 
426b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
427b2b77a04SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
428b2b77a04SHong Zhang   if (size == 1){ /* sequential case */
429b2b77a04SHong Zhang     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
430b2b77a04SHong Zhang     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
4313c3a9c75SAmlan Barua     printf("The code successfully comes at the end of the routine with one processor\n");
432b2b77a04SHong Zhang   } else {        /* mpi case */
433b2b77a04SHong Zhang     ptrdiff_t      alloc_local,local_n0,local_0_start;
4346882372aSHong Zhang     ptrdiff_t      local_n1,local_1_end;
435c92418ccSAmlan Barua     PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
436b2b77a04SHong Zhang     fftw_complex   *data_fin,*data_fout;
4373c3a9c75SAmlan Barua     double         *data_finr, *data_foutr;
4383c3a9c75SAmlan Barua     ptrdiff_t local_1_start;
439c92418ccSAmlan Barua //    PetscInt ctr;
440c92418ccSAmlan Barua //    ptrdiff_t      ndim1,*pdim;
441c92418ccSAmlan Barua //    ndim1=(ptrdiff_t) ndim;
442c92418ccSAmlan Barua //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
44311902ff2SHong Zhang 
444c92418ccSAmlan Barua //    for(ctr=0;ctr<ndim;ctr++)
445f76f76beSAmlan Barua //        {k
446c92418ccSAmlan Barua //           pdim[ctr] = dim[ctr];
447c92418ccSAmlan Barua //       }
448b2b77a04SHong Zhang 
449f76f76beSAmlan Barua 
450f76f76beSAmlan Barua 
451b2b77a04SHong Zhang     switch (ndim){
452b2b77a04SHong Zhang     case 1:
4536882372aSHong Zhang       /* Get local size */
454c92418ccSAmlan Barua       /* We need to write an error message here saying that one cannot call this routine when doing paralllel 1D complex FFTW */
455c92418ccSAmlan Barua //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
4566882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
4576882372aSHong Zhang       if (fin) {
4586882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4596882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
4606882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4616882372aSHong Zhang       }
4626882372aSHong Zhang       if (fout) {
4636882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4646882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
4656882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
4666882372aSHong Zhang       }
467b2b77a04SHong Zhang       break;
468b2b77a04SHong Zhang     case 2:
4693c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4703c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4713c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
4723c3a9c75SAmlan Barua       if (fin) {
4733c3a9c75SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
47454dd5118SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
4753c3a9c75SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
47609dd8a53SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
4773c3a9c75SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4783c3a9c75SAmlan Barua       }
4793c3a9c75SAmlan Barua       if (fout) {
48009dd8a53SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
48109dd8a53SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
4823c3a9c75SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
4833c3a9c75SAmlan Barua       }
484f76f76beSAmlan Barua       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
4853c3a9c75SAmlan Barua 
4863c3a9c75SAmlan Barua #else
487b2b77a04SHong Zhang       /* Get local size */
48809dd8a53SAmlan Barua      printf("Hope this does not come here");
489b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
490b2b77a04SHong Zhang       if (fin) {
491b2b77a04SHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
492b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
493b2b77a04SHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
494b2b77a04SHong Zhang       }
495b2b77a04SHong Zhang       if (fout) {
496b2b77a04SHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
497b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
498b2b77a04SHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
499b2b77a04SHong Zhang       }
5003c3a9c75SAmlan Barua      printf("Hope this does not come here");
5013c3a9c75SAmlan Barua #endif
502b2b77a04SHong Zhang       break;
503b2b77a04SHong Zhang     case 3:
5046882372aSHong Zhang       /* Get local size */
5053c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5063c3a9c75SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"Not done yet");
5073c3a9c75SAmlan Barua #else
5086882372aSHong Zhang       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
50911902ff2SHong Zhang //      printf("The quantity n is %d",n);
5106882372aSHong Zhang       if (fin) {
5116882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5126882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5136882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5146882372aSHong Zhang       }
5156882372aSHong Zhang       if (fout) {
5166882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5176882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5186882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5196882372aSHong Zhang       }
5203c3a9c75SAmlan Barua #endif
521b2b77a04SHong Zhang       break;
522b2b77a04SHong Zhang     default:
5236882372aSHong Zhang       /* Get local size */
5243c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5253c3a9c75SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"Not done yet");
5263c3a9c75SAmlan Barua #else
527c92418ccSAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
52811902ff2SHong Zhang //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
52911902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
53011902ff2SHong Zhang //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
53111902ff2SHong Zhang //      for(i=0;i<ndim;i++)
53211902ff2SHong Zhang //         {
53311902ff2SHong Zhang //          pdim[i]=dim[i];printf("%d",pdim[i]);
53411902ff2SHong Zhang //         }
53511902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
53611902ff2SHong Zhang //      printf("The quantity n is %d",n);
5376882372aSHong Zhang       if (fin) {
5386882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5396882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5406882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5416882372aSHong Zhang       }
5426882372aSHong Zhang       if (fout) {
5436882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5446882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5456882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5466882372aSHong Zhang       }
5473c3a9c75SAmlan Barua #endif
548b2b77a04SHong Zhang       break;
549b2b77a04SHong Zhang     }
550b2b77a04SHong Zhang   }
55154dd5118SAmlan Barua //  if (fin){
55254dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
55354dd5118SAmlan Barua //  }
55454dd5118SAmlan Barua //  if (fout){
55554dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
55654dd5118SAmlan Barua //  }
557b2b77a04SHong Zhang   PetscFunctionReturn(0);
558b2b77a04SHong Zhang }
559b2b77a04SHong Zhang 
5603c3a9c75SAmlan Barua //EXTERN_C_BEGIN - Do we need this?
561b2b77a04SHong Zhang #undef __FUNCT__
5623c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT"
5633c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
5643c3a9c75SAmlan Barua {
5653c3a9c75SAmlan Barua   PetscErrorCode ierr;
5663c3a9c75SAmlan Barua   PetscFunctionBegin;
5673c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
5683c3a9c75SAmlan Barua   PetscFunctionReturn(0);
5693c3a9c75SAmlan Barua }
5703c3a9c75SAmlan Barua //EXTERN_C_END - Do we need this?
571b2b77a04SHong Zhang /*
5723c3a9c75SAmlan Barua       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block
5733c3a9c75SAmlan Barua   Input A, x, y
5743c3a9c75SAmlan Barua   A - FFTW matrix
57554dd5118SAmlan Barua   x - user data
576b2b77a04SHong Zhang   Options Database Keys:
577b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags
578b2b77a04SHong Zhang 
579b2b77a04SHong Zhang    Level: intermediate
580b2b77a04SHong Zhang 
581b2b77a04SHong Zhang */
5823c3a9c75SAmlan Barua 
5833c3a9c75SAmlan Barua EXTERN_C_BEGIN
5843c3a9c75SAmlan Barua #undef __FUNCT__
5853c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW"
5863c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
5873c3a9c75SAmlan Barua {
5883c3a9c75SAmlan Barua   PetscErrorCode ierr;
5893c3a9c75SAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
5903c3a9c75SAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
5913c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
5923c3a9c75SAmlan Barua   PetscInt       N=fft->N, N1, n1 ,NM;
5933c3a9c75SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
594f76f76beSAmlan Barua   PetscInt       low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4;
595f76f76beSAmlan Barua   PetscInt       i,j,rank,size;
5963c3a9c75SAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
5973c3a9c75SAmlan Barua   ptrdiff_t      local_n1,local_1_start;
5983c3a9c75SAmlan Barua   VecScatter     vecscat;
5993c3a9c75SAmlan Barua   IS             list1,list2;
6003c3a9c75SAmlan Barua 
601f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
602f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
6033c3a9c75SAmlan Barua   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
604f76f76beSAmlan Barua   printf("Local ownership starts at %d\n",low);
6053c3a9c75SAmlan Barua 
6063c3a9c75SAmlan Barua  switch (ndim){
6073c3a9c75SAmlan Barua  case 1:
6083c3a9c75SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"Not Supported by FFTW");
6093c3a9c75SAmlan Barua   break;
6103c3a9c75SAmlan Barua  case 2:
6113c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
6123c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
6133c3a9c75SAmlan Barua 
614*5b3e41ffSAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
615*5b3e41ffSAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
616*5b3e41ffSAmlan Barua      printf("Val local_0_start = %ld\n",local_0_start);
6173c3a9c75SAmlan Barua 
6183c3a9c75SAmlan Barua      if (dim[1]%2==0)
6193c3a9c75SAmlan Barua       NM = dim[1]+2;
6203c3a9c75SAmlan Barua     else
6213c3a9c75SAmlan Barua       NM = dim[1]+1;
6223c3a9c75SAmlan Barua 
6233c3a9c75SAmlan Barua      for (i=0;i<local_n0;i++){
6243c3a9c75SAmlan Barua         for (j=0;j<dim[1];j++){
6253c3a9c75SAmlan Barua             tempindx = i*dim[1] + j;
6263c3a9c75SAmlan Barua             tempindx1 = i*NM + j;
627*5b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
6283c3a9c75SAmlan Barua             indx2[tempindx]=low+tempindx1;
629*5b3e41ffSAmlan Barua            // printf("Val tempindx1 = %d\n",tempindx1);
630*5b3e41ffSAmlan Barua   //          printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
631*5b3e41ffSAmlan Barua   //          printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
632*5b3e41ffSAmlan Barua   //          printf("-------------------------\n",indx2[tempindx],rank);
6333c3a9c75SAmlan Barua         }
6343c3a9c75SAmlan Barua      }
6353c3a9c75SAmlan Barua 
6363c3a9c75SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
6373c3a9c75SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
6383c3a9c75SAmlan Barua 
639f76f76beSAmlan Barua      ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
640f76f76beSAmlan Barua      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
641f76f76beSAmlan Barua      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
642f76f76beSAmlan Barua      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
6433c3a9c75SAmlan Barua      break;
6443c3a9c75SAmlan Barua 
6453c3a9c75SAmlan Barua  case 3:
6463c3a9c75SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet");
6473c3a9c75SAmlan Barua   break;
6483c3a9c75SAmlan Barua 
6493c3a9c75SAmlan Barua  default:
6503c3a9c75SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet");
6513c3a9c75SAmlan Barua   break;
6523c3a9c75SAmlan Barua  }
6533c3a9c75SAmlan Barua 
6543c3a9c75SAmlan Barua  return 0;
6553c3a9c75SAmlan Barua }
6563c3a9c75SAmlan Barua EXTERN_C_END
6573c3a9c75SAmlan Barua 
658*5b3e41ffSAmlan Barua 
6593c3a9c75SAmlan Barua //EXTERN_C_BEGIN - Do we need this?
6603c3a9c75SAmlan Barua #undef __FUNCT__
6613c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT"
6623c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
6633c3a9c75SAmlan Barua {
6643c3a9c75SAmlan Barua   PetscErrorCode ierr;
6653c3a9c75SAmlan Barua   PetscFunctionBegin;
6663c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
6673c3a9c75SAmlan Barua   PetscFunctionReturn(0);
6683c3a9c75SAmlan Barua }
6693c3a9c75SAmlan Barua //EXTERN_C_END - Do we need this?
670*5b3e41ffSAmlan Barua /*
671*5b3e41ffSAmlan Barua       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
672*5b3e41ffSAmlan Barua   Input A, x, y
673*5b3e41ffSAmlan Barua   A - FFTW matrix
674*5b3e41ffSAmlan Barua   x - FFTW vector
675*5b3e41ffSAmlan Barua   y - PETSc vector that user can read
676*5b3e41ffSAmlan Barua   Options Database Keys:
677*5b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
678*5b3e41ffSAmlan Barua 
679*5b3e41ffSAmlan Barua    Level: intermediate
680*5b3e41ffSAmlan Barua 
6813c3a9c75SAmlan Barua */
6823c3a9c75SAmlan Barua 
6833c3a9c75SAmlan Barua EXTERN_C_BEGIN
6843c3a9c75SAmlan Barua #undef __FUNCT__
685*5b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW"
686*5b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
687*5b3e41ffSAmlan Barua {
688*5b3e41ffSAmlan Barua   PetscErrorCode ierr;
689*5b3e41ffSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
690*5b3e41ffSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
691*5b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
692*5b3e41ffSAmlan Barua   PetscInt       N=fft->N, N1, n1 ,NM;
693*5b3e41ffSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
694*5b3e41ffSAmlan Barua   PetscInt       low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4;
695*5b3e41ffSAmlan Barua   PetscInt       i,j,rank,size;
696*5b3e41ffSAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
697*5b3e41ffSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
698*5b3e41ffSAmlan Barua   VecScatter     vecscat;
699*5b3e41ffSAmlan Barua   IS             list1,list2;
700*5b3e41ffSAmlan Barua 
701*5b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
702*5b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
703*5b3e41ffSAmlan Barua   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
704*5b3e41ffSAmlan Barua   printf("Local ownership starts at %d\n",low);
705*5b3e41ffSAmlan Barua 
706*5b3e41ffSAmlan Barua  switch (ndim){
707*5b3e41ffSAmlan Barua  case 1:
708*5b3e41ffSAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"Not Supported by FFTW");
709*5b3e41ffSAmlan Barua   break;
710*5b3e41ffSAmlan Barua  case 2:
711*5b3e41ffSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
712*5b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
713*5b3e41ffSAmlan Barua 
714*5b3e41ffSAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
715*5b3e41ffSAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
716*5b3e41ffSAmlan Barua      printf("Val local_0_start = %ld\n",local_0_start);
717*5b3e41ffSAmlan Barua 
718*5b3e41ffSAmlan Barua      if (dim[1]%2==0)
719*5b3e41ffSAmlan Barua       NM = dim[1]+2;
720*5b3e41ffSAmlan Barua     else
721*5b3e41ffSAmlan Barua       NM = dim[1]+1;
722*5b3e41ffSAmlan Barua 
723*5b3e41ffSAmlan Barua 
724*5b3e41ffSAmlan Barua 
725*5b3e41ffSAmlan Barua      for (i=0;i<local_n0;i++){
726*5b3e41ffSAmlan Barua         for (j=0;j<dim[1];j++){
727*5b3e41ffSAmlan Barua             tempindx = i*dim[1] + j;
728*5b3e41ffSAmlan Barua             tempindx1 = i*NM + j;
729*5b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
730*5b3e41ffSAmlan Barua             indx2[tempindx]=low+tempindx1;
731*5b3e41ffSAmlan Barua             printf("Val tempindx1 = %d\n",tempindx1);
732*5b3e41ffSAmlan Barua             printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
733*5b3e41ffSAmlan Barua             printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
734*5b3e41ffSAmlan Barua             printf("-------------------------\n",indx2[tempindx],rank);
735*5b3e41ffSAmlan Barua         }
736*5b3e41ffSAmlan Barua      }
737*5b3e41ffSAmlan Barua 
738*5b3e41ffSAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
739*5b3e41ffSAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
740*5b3e41ffSAmlan Barua 
741*5b3e41ffSAmlan Barua      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
742*5b3e41ffSAmlan Barua      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
743*5b3e41ffSAmlan Barua      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
744*5b3e41ffSAmlan Barua      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
745*5b3e41ffSAmlan Barua      break;
746*5b3e41ffSAmlan Barua 
747*5b3e41ffSAmlan Barua  case 3:
748*5b3e41ffSAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet");
749*5b3e41ffSAmlan Barua   break;
750*5b3e41ffSAmlan Barua 
751*5b3e41ffSAmlan Barua  default:
752*5b3e41ffSAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet");
753*5b3e41ffSAmlan Barua   break;
754*5b3e41ffSAmlan Barua  }
755*5b3e41ffSAmlan Barua 
756*5b3e41ffSAmlan Barua  return 0;
757*5b3e41ffSAmlan Barua }
758*5b3e41ffSAmlan Barua EXTERN_C_END
759*5b3e41ffSAmlan Barua 
760*5b3e41ffSAmlan Barua 
761*5b3e41ffSAmlan Barua 
762*5b3e41ffSAmlan Barua 
763*5b3e41ffSAmlan Barua EXTERN_C_BEGIN
764*5b3e41ffSAmlan Barua #undef __FUNCT__
7653c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
7663c3a9c75SAmlan Barua /*
7673c3a9c75SAmlan Barua       MatCreate_FFTW - Creates a matrix object that provides FFT
7683c3a9c75SAmlan Barua   via the external package FFTW
7693c3a9c75SAmlan Barua   Options Database Keys:
7703c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
7713c3a9c75SAmlan Barua 
7723c3a9c75SAmlan Barua    Level: intermediate
7733c3a9c75SAmlan Barua 
7743c3a9c75SAmlan Barua */
7753c3a9c75SAmlan Barua 
776b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A)
777b2b77a04SHong Zhang {
778b2b77a04SHong Zhang   PetscErrorCode ierr;
779b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
780b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
781b2b77a04SHong Zhang   Mat_FFTW       *fftw;
782b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
783b2b77a04SHong Zhang   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
784b2b77a04SHong Zhang   PetscBool      flg;
7856882372aSHong Zhang   PetscInt       p_flag,partial_dim=1,ctr;
78611902ff2SHong Zhang   PetscMPIInt    size,rank;
78711902ff2SHong Zhang   ptrdiff_t      *pdim;
788*5b3e41ffSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
789b2b77a04SHong Zhang 
790b2b77a04SHong Zhang   PetscFunctionBegin;
7913c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
7923c3a9c75SAmlan Barua //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
7933c3a9c75SAmlan Barua //#endif
794b2b77a04SHong Zhang 
795b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
79611902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
79754dd5118SAmlan Barua   ierr = MPI_Barrier(PETSC_COMM_WORLD);
798c92418ccSAmlan Barua 
79911902ff2SHong Zhang   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
80011902ff2SHong Zhang   pdim[0] = dim[0];
80111902ff2SHong Zhang   for(ctr=1;ctr<ndim;ctr++)
80211902ff2SHong Zhang       {
8036882372aSHong Zhang           partial_dim *= dim[ctr];
80411902ff2SHong Zhang           pdim[ctr] = dim[ctr];
8056882372aSHong Zhang       }
8063c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
8073c3a9c75SAmlan Barua //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
8083c3a9c75SAmlan Barua //#endif
8093c3a9c75SAmlan Barua 
81011902ff2SHong Zhang //  printf("partial dimension is %d",partial_dim);
811b2b77a04SHong Zhang   if (size == 1) {
812b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
813b2b77a04SHong Zhang     n = N;
814b2b77a04SHong Zhang   } else {
8156882372aSHong Zhang     ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end;
816b2b77a04SHong Zhang     switch (ndim){
817b2b77a04SHong Zhang     case 1:
8183c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
8193c3a9c75SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
8203c3a9c75SAmlan Barua #endif
8216882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
8226882372aSHong Zhang       n = (PetscInt)local_n0;
8236882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
8243c3a9c75SAmlan Barua //      PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW);
825b2b77a04SHong Zhang       break;
826b2b77a04SHong Zhang     case 2:
827*5b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
828b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
829b2b77a04SHong Zhang       /*
830b2b77a04SHong Zhang        PetscMPIInt    rank;
831b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
832b2b77a04SHong Zhang        PetscSynchronizedFlush(comm);
833b2b77a04SHong Zhang        */
834b2b77a04SHong Zhang       n = (PetscInt)local_n0*dim[1];
835b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
836*5b3e41ffSAmlan Barua #else
837*5b3e41ffSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
838*5b3e41ffSAmlan Barua       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
839*5b3e41ffSAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
840*5b3e41ffSAmlan Barua #endif
841b2b77a04SHong Zhang       break;
842b2b77a04SHong Zhang     case 3:
84311902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
8446882372aSHong Zhang       n = (PetscInt)local_n0*dim[1]*dim[2];
8456882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
846b2b77a04SHong Zhang       break;
847b2b77a04SHong Zhang     default:
84811902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
849c92418ccSAmlan Barua //      printf("The value of alloc local is %ld from process %d\n",alloc_local,rank);
85011902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
8516882372aSHong Zhang       n = (PetscInt)local_n0*partial_dim;
85211902ff2SHong Zhang //      printf("New partial dimension is %d %d %d",n,N,ndim);
8536882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
854b2b77a04SHong Zhang       break;
855b2b77a04SHong Zhang     }
856b2b77a04SHong Zhang   }
857b2b77a04SHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
858b2b77a04SHong Zhang 
859b2b77a04SHong Zhang   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
860b2b77a04SHong Zhang   fft->data = (void*)fftw;
861b2b77a04SHong Zhang 
862b2b77a04SHong Zhang   fft->n           = n;
863c92418ccSAmlan Barua   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
864c92418ccSAmlan Barua   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
865b1301b2eSHong Zhang   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
866c92418ccSAmlan Barua 
867b2b77a04SHong Zhang   fftw->p_forward  = 0;
868b2b77a04SHong Zhang   fftw->p_backward = 0;
869b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
870b2b77a04SHong Zhang 
871b2b77a04SHong Zhang   if (size == 1){
872b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
873b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
874b2b77a04SHong Zhang   } else {
875b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
876b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
877b2b77a04SHong Zhang   }
878b2b77a04SHong Zhang   fft->matdestroy          = MatDestroy_FFTW;
879b2b77a04SHong Zhang   A->ops->getvecs       = MatGetVecs_FFTW;
880b2b77a04SHong Zhang   A->assembled          = PETSC_TRUE;
8813c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
8823c3a9c75SAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
883*5b3e41ffSAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
8843c3a9c75SAmlan Barua #endif
885b2b77a04SHong Zhang 
886b2b77a04SHong Zhang   /* get runtime options */
887b2b77a04SHong Zhang   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
888b2b77a04SHong Zhang     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
889b2b77a04SHong Zhang     if (flg) {fftw->p_flag = (unsigned)p_flag;}
890b2b77a04SHong Zhang   PetscOptionsEnd();
891b2b77a04SHong Zhang   PetscFunctionReturn(0);
892b2b77a04SHong Zhang }
893b2b77a04SHong Zhang EXTERN_C_END
8943c3a9c75SAmlan Barua 
8953c3a9c75SAmlan Barua 
8963c3a9c75SAmlan Barua 
8973c3a9c75SAmlan Barua 
898