xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision c92418cca540ef05b7510dbb1ac3ec1304f22103)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
15b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
16b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
17b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
18b2b77a04SHong Zhang } Mat_FFTW;
19b2b77a04SHong Zhang 
20b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
21b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
25b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
26b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
27b2b77a04SHong Zhang 
28b2b77a04SHong Zhang #undef __FUNCT__
29b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
30b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
31b2b77a04SHong Zhang {
32b2b77a04SHong Zhang   PetscErrorCode ierr;
33b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
34b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
35b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
36b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
37b2b77a04SHong Zhang 
38b2b77a04SHong Zhang   PetscFunctionBegin;
39b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
40b9ae062cSHong Zhang 
41b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
42b2b77a04SHong Zhang #endif
43b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
44b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
45b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
46b2b77a04SHong Zhang     switch (ndim){
47b2b77a04SHong Zhang     case 1:
48b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
49b2b77a04SHong Zhang       break;
50b2b77a04SHong Zhang     case 2:
51b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
52b2b77a04SHong Zhang       break;
53b2b77a04SHong Zhang     case 3:
54b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
55b2b77a04SHong Zhang       break;
56b2b77a04SHong Zhang     default:
57b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
58b2b77a04SHong Zhang       break;
59b2b77a04SHong Zhang     }
60b2b77a04SHong Zhang     fftw->finarray  = x_array;
61b2b77a04SHong Zhang     fftw->foutarray = y_array;
62b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
63b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
64b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
65b2b77a04SHong Zhang   } else { /* use existing plan */
66b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
67b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
68b2b77a04SHong Zhang     } else {
69b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
70b2b77a04SHong Zhang     }
71b2b77a04SHong Zhang   }
72b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
73b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
74b2b77a04SHong Zhang   PetscFunctionReturn(0);
75b2b77a04SHong Zhang }
76b2b77a04SHong Zhang 
77b2b77a04SHong Zhang #undef __FUNCT__
78b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
79b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
80b2b77a04SHong Zhang {
81b2b77a04SHong Zhang   PetscErrorCode ierr;
82b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
83b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
84b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
85b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
86b2b77a04SHong Zhang 
87b2b77a04SHong Zhang   PetscFunctionBegin;
88b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
89b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
90b2b77a04SHong Zhang #endif
91b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
92b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
93b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
94b2b77a04SHong Zhang     switch (ndim){
95b2b77a04SHong Zhang     case 1:
96b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
97b2b77a04SHong Zhang       break;
98b2b77a04SHong Zhang     case 2:
99b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
100b2b77a04SHong Zhang       break;
101b2b77a04SHong Zhang     case 3:
102b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
103b2b77a04SHong Zhang       break;
104b2b77a04SHong Zhang     default:
105b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
106b2b77a04SHong Zhang       break;
107b2b77a04SHong Zhang     }
108b2b77a04SHong Zhang     fftw->binarray  = x_array;
109b2b77a04SHong Zhang     fftw->boutarray = y_array;
110b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
111b2b77a04SHong Zhang   } else { /* use existing plan */
112b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
113b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
114b2b77a04SHong Zhang     } else {
115b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
116b2b77a04SHong Zhang     }
117b2b77a04SHong Zhang   }
118b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
119b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
120b2b77a04SHong Zhang   PetscFunctionReturn(0);
121b2b77a04SHong Zhang }
122b2b77a04SHong Zhang 
123b2b77a04SHong Zhang #undef __FUNCT__
124b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
125b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
126b2b77a04SHong Zhang {
127b2b77a04SHong Zhang   PetscErrorCode ierr;
128b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
129b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
130b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
131*c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
132b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
133*c92418ccSAmlan Barua // PetscInt ctr;
134*c92418ccSAmlan Barua //  ptrdiff_t      ndim1=(ptrdiff_t) ndim,*pdim;
135*c92418ccSAmlan Barua //  ndim1=(ptrdiff_t) ndim;
136*c92418ccSAmlan Barua //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
137*c92418ccSAmlan Barua 
138*c92418ccSAmlan Barua //  for(ctr=0;ctr<ndim;ctr++)
139*c92418ccSAmlan Barua //     {
140*c92418ccSAmlan Barua //      pdim[ctr] = dim[ctr];
141*c92418ccSAmlan Barua //     }
142b2b77a04SHong Zhang 
143b2b77a04SHong Zhang   PetscFunctionBegin;
144b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
145b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
146b2b77a04SHong Zhang #endif
147*c92418ccSAmlan Barua //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
148*c92418ccSAmlan Barua //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
14911902ff2SHong Zhang 
150b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
151b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
152b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
153b2b77a04SHong Zhang     switch (ndim){
154b2b77a04SHong Zhang     case 1:
155b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
156b2b77a04SHong Zhang       break;
157b2b77a04SHong Zhang     case 2:
158b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
159b2b77a04SHong Zhang       break;
160b2b77a04SHong Zhang     case 3:
161b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
162b2b77a04SHong Zhang       break;
163b2b77a04SHong Zhang     default:
164*c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
16511902ff2SHong Zhang  //     fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
166b2b77a04SHong Zhang       break;
167b2b77a04SHong Zhang     }
168b2b77a04SHong Zhang     fftw->finarray  = x_array;
169b2b77a04SHong Zhang     fftw->foutarray = y_array;
170b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
171b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
172b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
173b2b77a04SHong Zhang   } else { /* use existing plan */
174b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
175b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
176b2b77a04SHong Zhang     } else {
177b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
178b2b77a04SHong Zhang     }
179b2b77a04SHong Zhang   }
180b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
181b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
182b2b77a04SHong Zhang   PetscFunctionReturn(0);
183b2b77a04SHong Zhang }
184b2b77a04SHong Zhang 
185b2b77a04SHong Zhang #undef __FUNCT__
186b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
187b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
188b2b77a04SHong Zhang {
189b2b77a04SHong Zhang   PetscErrorCode ierr;
190b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
191b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
192b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
193*c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
194b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
195*c92418ccSAmlan Barua //  PetscInt       ctr;
196*c92418ccSAmlan Barua //  ptrdiff_t      ndim1=(ptrdiff_t)ndim,*pdim;
197*c92418ccSAmlan Barua //  ndim1=(ptrdiff_t) ndim;
198*c92418ccSAmlan Barua //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
199*c92418ccSAmlan Barua 
200*c92418ccSAmlan Barua //  for(ctr=0;ctr<ndim;ctr++)
201*c92418ccSAmlan Barua //     {
202*c92418ccSAmlan Barua //      pdim[ctr] = dim[ctr];
203*c92418ccSAmlan Barua //     }
204b2b77a04SHong Zhang 
205b2b77a04SHong Zhang   PetscFunctionBegin;
206b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
207b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
208b2b77a04SHong Zhang #endif
209*c92418ccSAmlan Barua //  ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr);
210*c92418ccSAmlan Barua // should pdim be a member of Mat_FFTW?
211*c92418ccSAmlan Barua //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
21211902ff2SHong Zhang 
213b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
214b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
215b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
216b2b77a04SHong Zhang     switch (ndim){
217b2b77a04SHong Zhang     case 1:
218b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
219b2b77a04SHong Zhang       break;
220b2b77a04SHong Zhang     case 2:
221b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
222b2b77a04SHong Zhang       break;
223b2b77a04SHong Zhang     case 3:
224b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
225b2b77a04SHong Zhang       break;
226b2b77a04SHong Zhang     default:
227*c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
228*c92418ccSAmlan Barua //      fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
229b2b77a04SHong Zhang       break;
230b2b77a04SHong Zhang     }
231b2b77a04SHong Zhang     fftw->binarray  = x_array;
232b2b77a04SHong Zhang     fftw->boutarray = y_array;
233b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
234b2b77a04SHong Zhang   } else { /* use existing plan */
235b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
236b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
237b2b77a04SHong Zhang     } else {
238b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
239b2b77a04SHong Zhang     }
240b2b77a04SHong Zhang   }
241b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
242b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
243b2b77a04SHong Zhang   PetscFunctionReturn(0);
244b2b77a04SHong Zhang }
245b2b77a04SHong Zhang 
246b2b77a04SHong Zhang #undef __FUNCT__
247b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
248b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
249b2b77a04SHong Zhang {
250b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
251b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
252b2b77a04SHong Zhang   PetscErrorCode ierr;
253b2b77a04SHong Zhang 
254b2b77a04SHong Zhang   PetscFunctionBegin;
255b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
256b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
257b2b77a04SHong Zhang #endif
258b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
259b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
260bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
261b2b77a04SHong Zhang   PetscFunctionReturn(0);
262b2b77a04SHong Zhang }
263b2b77a04SHong Zhang 
264c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
265b2b77a04SHong Zhang #undef __FUNCT__
266b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
267b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
268b2b77a04SHong Zhang {
269b2b77a04SHong Zhang   PetscErrorCode  ierr;
270b2b77a04SHong Zhang   PetscScalar     *array;
271b2b77a04SHong Zhang 
272b2b77a04SHong Zhang   PetscFunctionBegin;
273b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
274b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
275b2b77a04SHong Zhang #endif
276b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
277b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
278b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
279b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
280b2b77a04SHong Zhang   PetscFunctionReturn(0);
281b2b77a04SHong Zhang }
282b2b77a04SHong Zhang 
283b2b77a04SHong Zhang #undef __FUNCT__
284*c92418ccSAmlan Barua #define __FUNCT__ "MatGetVecs_FFTW1D"
285*c92418ccSAmlan Barua /*
286*c92418ccSAmlan Barua    MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the
287*c92418ccSAmlan Barua      parallel layout determined by FFTW-1D
288*c92418ccSAmlan Barua 
289*c92418ccSAmlan Barua */
290*c92418ccSAmlan Barua PetscErrorCode  MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout)
291*c92418ccSAmlan Barua {
292*c92418ccSAmlan Barua   PetscErrorCode ierr;
293*c92418ccSAmlan Barua   PetscMPIInt    size,rank;
294*c92418ccSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
295*c92418ccSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
296*c92418ccSAmlan Barua //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
297*c92418ccSAmlan Barua   PetscInt       N=fft->N;
298*c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
299*c92418ccSAmlan Barua   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
300*c92418ccSAmlan Barua   ptrdiff_t      f_local_n1,f_local_1_end;
301*c92418ccSAmlan Barua   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
302*c92418ccSAmlan Barua   ptrdiff_t      b_local_n1,b_local_1_end;
303*c92418ccSAmlan Barua   fftw_complex   *data_fin,*data_fout,*data_bin,*data_bout;
304*c92418ccSAmlan Barua 
305*c92418ccSAmlan Barua   PetscFunctionBegin;
306*c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
307*c92418ccSAmlan Barua   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
308*c92418ccSAmlan Barua #endif
309*c92418ccSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
310*c92418ccSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
311*c92418ccSAmlan Barua   if (size == 1){
312*c92418ccSAmlan Barua     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
313*c92418ccSAmlan Barua   }
314*c92418ccSAmlan Barua   else {
315*c92418ccSAmlan Barua       if (ndim>1){
316*c92418ccSAmlan Barua         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
317*c92418ccSAmlan Barua       else {
318*c92418ccSAmlan Barua           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
319*c92418ccSAmlan Barua           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
320*c92418ccSAmlan Barua           if (fin) {
321*c92418ccSAmlan Barua             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
322*c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
323*c92418ccSAmlan Barua             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
324*c92418ccSAmlan Barua           }
325*c92418ccSAmlan Barua           if (fout) {
326*c92418ccSAmlan Barua             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
327*c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
328*c92418ccSAmlan Barua             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
329*c92418ccSAmlan Barua           }
330*c92418ccSAmlan Barua           if (bin) {
331*c92418ccSAmlan Barua             data_bin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
332*c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr);
333*c92418ccSAmlan Barua             (*bin)->ops->destroy   = VecDestroy_MPIFFTW;
334*c92418ccSAmlan Barua           }
335*c92418ccSAmlan Barua           if (bout) {
336*c92418ccSAmlan Barua             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
337*c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
338*c92418ccSAmlan Barua             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
339*c92418ccSAmlan Barua           }
340*c92418ccSAmlan Barua   }
341*c92418ccSAmlan Barua   if (fin){
342*c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
343*c92418ccSAmlan Barua   }
344*c92418ccSAmlan Barua   if (fout){
345*c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
346*c92418ccSAmlan Barua   }
347*c92418ccSAmlan Barua   if (bin){
348*c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr);
349*c92418ccSAmlan Barua   }
350*c92418ccSAmlan Barua   if (bout){
351*c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
352*c92418ccSAmlan Barua   }
353*c92418ccSAmlan Barua   PetscFunctionReturn(0);
354*c92418ccSAmlan Barua }
355*c92418ccSAmlan Barua 
356*c92418ccSAmlan Barua 
357*c92418ccSAmlan Barua 
358*c92418ccSAmlan Barua 
359*c92418ccSAmlan Barua 
360*c92418ccSAmlan Barua 
361*c92418ccSAmlan Barua 
362*c92418ccSAmlan Barua }
363*c92418ccSAmlan Barua #undef __FUNCT__
364b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW"
365b2b77a04SHong Zhang /*
366b2b77a04SHong Zhang    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
367b2b77a04SHong Zhang      parallel layout determined by FFTW
368b2b77a04SHong Zhang 
369b2b77a04SHong Zhang    Collective on Mat
370b2b77a04SHong Zhang 
371b2b77a04SHong Zhang    Input Parameter:
372b2b77a04SHong Zhang .  mat - the matrix
373b2b77a04SHong Zhang 
374b2b77a04SHong Zhang    Output Parameter:
375b2b77a04SHong Zhang +   fin - (optional) input vector of forward FFTW
376b2b77a04SHong Zhang -   fout - (optional) output vector of forward FFTW
377b2b77a04SHong Zhang 
378b2b77a04SHong Zhang   Level: advanced
379b2b77a04SHong Zhang 
380b2b77a04SHong Zhang .seealso: MatCreateFFTW()
381b2b77a04SHong Zhang */
382b2b77a04SHong Zhang PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
383b2b77a04SHong Zhang {
384b2b77a04SHong Zhang   PetscErrorCode ierr;
385b2b77a04SHong Zhang   PetscMPIInt    size,rank;
386b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
387b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
388*c92418ccSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
389b2b77a04SHong Zhang   PetscInt       N=fft->N;
390b2b77a04SHong Zhang 
391b2b77a04SHong Zhang   PetscFunctionBegin;
392b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
393b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
394b2b77a04SHong Zhang #endif
395b2b77a04SHong Zhang   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
396b2b77a04SHong Zhang   PetscValidType(A,1);
397b2b77a04SHong Zhang 
398b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
399b2b77a04SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
400b2b77a04SHong Zhang   if (size == 1){ /* sequential case */
401b2b77a04SHong Zhang     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
402b2b77a04SHong Zhang     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
403b2b77a04SHong Zhang   } else {        /* mpi case */
404b2b77a04SHong Zhang     ptrdiff_t      alloc_local,local_n0,local_0_start;
4056882372aSHong Zhang     ptrdiff_t      local_n1,local_1_end;
406*c92418ccSAmlan Barua     PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
407b2b77a04SHong Zhang     fftw_complex   *data_fin,*data_fout;
408*c92418ccSAmlan Barua //    PetscInt ctr;
409*c92418ccSAmlan Barua //    ptrdiff_t      ndim1,*pdim;
410*c92418ccSAmlan Barua //    ndim1=(ptrdiff_t) ndim;
411*c92418ccSAmlan Barua //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
41211902ff2SHong Zhang 
413*c92418ccSAmlan Barua //    for(ctr=0;ctr<ndim;ctr++)
414*c92418ccSAmlan Barua //        {
415*c92418ccSAmlan Barua //           pdim[ctr] = dim[ctr];
416*c92418ccSAmlan Barua //       }
417b2b77a04SHong Zhang 
418b2b77a04SHong Zhang     switch (ndim){
419b2b77a04SHong Zhang     case 1:
4206882372aSHong Zhang       /* Get local size */
421*c92418ccSAmlan Barua       /* We need to write an error message here saying that one cannot call this routine when doing paralllel 1D complex FFTW */
422*c92418ccSAmlan Barua //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
4236882372aSHong Zhang 
4246882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
4256882372aSHong Zhang       if (fin) {
4266882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4276882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
4286882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4296882372aSHong Zhang       }
4306882372aSHong Zhang       if (fout) {
4316882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4326882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
4336882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
4346882372aSHong Zhang       }
435b2b77a04SHong Zhang       break;
436b2b77a04SHong Zhang     case 2:
437b2b77a04SHong Zhang       /* Get local size */
438b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
439b2b77a04SHong Zhang       if (fin) {
440b2b77a04SHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
441b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
442b2b77a04SHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
443b2b77a04SHong Zhang       }
444b2b77a04SHong Zhang       if (fout) {
445b2b77a04SHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
446b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
447b2b77a04SHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
448b2b77a04SHong Zhang       }
449b2b77a04SHong Zhang       break;
450b2b77a04SHong Zhang     case 3:
4516882372aSHong Zhang       /* Get local size */
4526882372aSHong Zhang       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
45311902ff2SHong Zhang //      printf("The quantity n is %d",n);
4546882372aSHong Zhang       if (fin) {
4556882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4566882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
4576882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4586882372aSHong Zhang       }
4596882372aSHong Zhang       if (fout) {
4606882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4616882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
4626882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
4636882372aSHong Zhang       }
464b2b77a04SHong Zhang       break;
465b2b77a04SHong Zhang     default:
4666882372aSHong Zhang       /* Get local size */
467*c92418ccSAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
46811902ff2SHong Zhang //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
46911902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
47011902ff2SHong Zhang //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
47111902ff2SHong Zhang //      for(i=0;i<ndim;i++)
47211902ff2SHong Zhang //         {
47311902ff2SHong Zhang //          pdim[i]=dim[i];printf("%d",pdim[i]);
47411902ff2SHong Zhang //         }
47511902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
47611902ff2SHong Zhang //      printf("The quantity n is %d",n);
4776882372aSHong Zhang       if (fin) {
4786882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4796882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
4806882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4816882372aSHong Zhang       }
4826882372aSHong Zhang       if (fout) {
4836882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4846882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
4856882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
4866882372aSHong Zhang       }
487b2b77a04SHong Zhang       break;
488b2b77a04SHong Zhang     }
489b2b77a04SHong Zhang   }
490fb7c10e0SHong Zhang   if (fin){
491262f75f9SHong Zhang     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
492fb7c10e0SHong Zhang   }
493fb7c10e0SHong Zhang   if (fout){
494262f75f9SHong Zhang     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
495fb7c10e0SHong Zhang   }
496b2b77a04SHong Zhang   PetscFunctionReturn(0);
497b2b77a04SHong Zhang }
498b2b77a04SHong Zhang 
499b2b77a04SHong Zhang EXTERN_C_BEGIN
500b2b77a04SHong Zhang #undef __FUNCT__
501b2b77a04SHong Zhang #define __FUNCT__ "MatCreate_FFTW"
502b2b77a04SHong Zhang /*
503b2b77a04SHong Zhang       MatCreate_FFTW - Creates a matrix object that provides FFT
504b2b77a04SHong Zhang   via the external package FFTW
505b2b77a04SHong Zhang 
506b2b77a04SHong Zhang   Options Database Keys:
507b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags
508b2b77a04SHong Zhang 
509b2b77a04SHong Zhang    Level: intermediate
510b2b77a04SHong Zhang 
511b2b77a04SHong Zhang */
512b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A)
513b2b77a04SHong Zhang {
514b2b77a04SHong Zhang   PetscErrorCode ierr;
515b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
516b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
517b2b77a04SHong Zhang   Mat_FFTW       *fftw;
518b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
519b2b77a04SHong Zhang   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
520b2b77a04SHong Zhang   PetscBool      flg;
5216882372aSHong Zhang   PetscInt       p_flag,partial_dim=1,ctr;
52211902ff2SHong Zhang   PetscMPIInt    size,rank;
52311902ff2SHong Zhang   ptrdiff_t      *pdim;
524b2b77a04SHong Zhang 
525b2b77a04SHong Zhang   PetscFunctionBegin;
526b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
527b2b77a04SHong Zhang   SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
528b2b77a04SHong Zhang #endif
529b2b77a04SHong Zhang 
530b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
53111902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
532*c92418ccSAmlan Barua 
53311902ff2SHong Zhang   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
53411902ff2SHong Zhang   pdim[0] = dim[0];
53511902ff2SHong Zhang   for(ctr=1;ctr<ndim;ctr++)
53611902ff2SHong Zhang       {
5376882372aSHong Zhang           partial_dim*= dim[ctr];
53811902ff2SHong Zhang           pdim[ctr] = dim[ctr];
5396882372aSHong Zhang       }
54011902ff2SHong Zhang //  printf("partial dimension is %d",partial_dim);
541b2b77a04SHong Zhang   if (size == 1) {
542b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
543b2b77a04SHong Zhang     n = N;
544b2b77a04SHong Zhang   } else {
5456882372aSHong Zhang     ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end;
546b2b77a04SHong Zhang     switch (ndim){
547b2b77a04SHong Zhang     case 1:
5486882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
5496882372aSHong Zhang       n = (PetscInt)local_n0;
5506882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
551*c92418ccSAmlan Barua  //     PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs_FFTW1D_C","MatGetVecs_FFTW1D",MatGetVecs_FFTW1D);
552b2b77a04SHong Zhang       break;
553b2b77a04SHong Zhang     case 2:
554b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
555b2b77a04SHong Zhang       /*
556b2b77a04SHong Zhang        PetscMPIInt    rank;
557b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
558b2b77a04SHong Zhang        PetscSynchronizedFlush(comm);
559b2b77a04SHong Zhang        */
560b2b77a04SHong Zhang       n = (PetscInt)local_n0*dim[1];
561b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
562b2b77a04SHong Zhang       break;
563b2b77a04SHong Zhang     case 3:
5646882372aSHong Zhang       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
56511902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
5666882372aSHong Zhang       n = (PetscInt)local_n0*dim[1]*dim[2];
5676882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
568b2b77a04SHong Zhang       break;
569b2b77a04SHong Zhang     default:
57011902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
571*c92418ccSAmlan Barua //      printf("The value of alloc local is %ld from process %d\n",alloc_local,rank);
57211902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
5736882372aSHong Zhang       n = (PetscInt)local_n0*partial_dim;
57411902ff2SHong Zhang //      printf("New partial dimension is %d %d %d",n,N,ndim);
5756882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
576b2b77a04SHong Zhang       break;
577b2b77a04SHong Zhang     }
578b2b77a04SHong Zhang   }
579b2b77a04SHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
580b2b77a04SHong Zhang 
581b2b77a04SHong Zhang   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
582b2b77a04SHong Zhang   fft->data = (void*)fftw;
583b2b77a04SHong Zhang 
584b2b77a04SHong Zhang   fft->n           = n;
585*c92418ccSAmlan Barua   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
586*c92418ccSAmlan Barua   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
587*c92418ccSAmlan Barua //  (fftw->dim_fftw)=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
588*c92418ccSAmlan Barua   for(ctr=0;ctr<ndim;ctr++)
589*c92418ccSAmlan Barua       {
590*c92418ccSAmlan Barua           (fftw->dim_fftw)[ctr]=dim[ctr];
591*c92418ccSAmlan Barua       }
592*c92418ccSAmlan Barua 
593b2b77a04SHong Zhang   fftw->p_forward  = 0;
594b2b77a04SHong Zhang   fftw->p_backward = 0;
595b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
596b2b77a04SHong Zhang 
597b2b77a04SHong Zhang   if (size == 1){
598b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
599b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
600b2b77a04SHong Zhang   } else {
601b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
602b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
603b2b77a04SHong Zhang   }
604b2b77a04SHong Zhang   fft->matdestroy          = MatDestroy_FFTW;
605b2b77a04SHong Zhang   A->ops->getvecs       = MatGetVecs_FFTW;
606b2b77a04SHong Zhang   A->assembled          = PETSC_TRUE;
607b2b77a04SHong Zhang 
608b2b77a04SHong Zhang   /* get runtime options */
609b2b77a04SHong Zhang   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
610b2b77a04SHong Zhang     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
611b2b77a04SHong Zhang     if (flg) {fftw->p_flag = (unsigned)p_flag;}
612b2b77a04SHong Zhang   PetscOptionsEnd();
613b2b77a04SHong Zhang   PetscFunctionReturn(0);
614b2b77a04SHong Zhang }
615b2b77a04SHong Zhang EXTERN_C_END
616