xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 3c3a9c753425a39a0d40385c0110b55c4bbacc67)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
15b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
16b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
17b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
18b2b77a04SHong Zhang } Mat_FFTW;
19b2b77a04SHong Zhang 
20b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
21b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
25b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
26b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
27b2b77a04SHong Zhang 
28b2b77a04SHong Zhang #undef __FUNCT__
29b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
30b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
31b2b77a04SHong Zhang {
32b2b77a04SHong Zhang   PetscErrorCode ierr;
33b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
34b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
35b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
36b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
37b2b77a04SHong Zhang 
38b2b77a04SHong Zhang   PetscFunctionBegin;
39b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
40b9ae062cSHong Zhang 
41b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
42b2b77a04SHong Zhang #endif
43b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
44b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
45b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
46b2b77a04SHong Zhang     switch (ndim){
47b2b77a04SHong Zhang     case 1:
48b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
49b2b77a04SHong Zhang       break;
50b2b77a04SHong Zhang     case 2:
51b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
52b2b77a04SHong Zhang       break;
53b2b77a04SHong Zhang     case 3:
54b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
55b2b77a04SHong Zhang       break;
56b2b77a04SHong Zhang     default:
57b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
58b2b77a04SHong Zhang       break;
59b2b77a04SHong Zhang     }
60b2b77a04SHong Zhang     fftw->finarray  = x_array;
61b2b77a04SHong Zhang     fftw->foutarray = y_array;
62b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
63b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
64b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
65b2b77a04SHong Zhang   } else { /* use existing plan */
66b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
67b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
68b2b77a04SHong Zhang     } else {
69b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
70b2b77a04SHong Zhang     }
71b2b77a04SHong Zhang   }
72b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
73b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
74b2b77a04SHong Zhang   PetscFunctionReturn(0);
75b2b77a04SHong Zhang }
76b2b77a04SHong Zhang 
77b2b77a04SHong Zhang #undef __FUNCT__
78b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
79b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
80b2b77a04SHong Zhang {
81b2b77a04SHong Zhang   PetscErrorCode ierr;
82b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
83b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
84b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
85b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
86b2b77a04SHong Zhang 
87b2b77a04SHong Zhang   PetscFunctionBegin;
88b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
89b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
90b2b77a04SHong Zhang #endif
91b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
92b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
93b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
94b2b77a04SHong Zhang     switch (ndim){
95b2b77a04SHong Zhang     case 1:
96b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
97b2b77a04SHong Zhang       break;
98b2b77a04SHong Zhang     case 2:
99b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
100b2b77a04SHong Zhang       break;
101b2b77a04SHong Zhang     case 3:
102b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
103b2b77a04SHong Zhang       break;
104b2b77a04SHong Zhang     default:
105b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
106b2b77a04SHong Zhang       break;
107b2b77a04SHong Zhang     }
108b2b77a04SHong Zhang     fftw->binarray  = x_array;
109b2b77a04SHong Zhang     fftw->boutarray = y_array;
110b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
111b2b77a04SHong Zhang   } else { /* use existing plan */
112b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
113b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
114b2b77a04SHong Zhang     } else {
115b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
116b2b77a04SHong Zhang     }
117b2b77a04SHong Zhang   }
118b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
119b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
120b2b77a04SHong Zhang   PetscFunctionReturn(0);
121b2b77a04SHong Zhang }
122b2b77a04SHong Zhang 
123b2b77a04SHong Zhang #undef __FUNCT__
124b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
125b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
126b2b77a04SHong Zhang {
127b2b77a04SHong Zhang   PetscErrorCode ierr;
128b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
129b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
130b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
131c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
132b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
133c92418ccSAmlan Barua // PetscInt ctr;
134c92418ccSAmlan Barua //  ptrdiff_t      ndim1=(ptrdiff_t) ndim,*pdim;
135c92418ccSAmlan Barua //  ndim1=(ptrdiff_t) ndim;
136c92418ccSAmlan Barua //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
137c92418ccSAmlan Barua 
138c92418ccSAmlan Barua //  for(ctr=0;ctr<ndim;ctr++)
139c92418ccSAmlan Barua //     {
140c92418ccSAmlan Barua //      pdim[ctr] = dim[ctr];
141c92418ccSAmlan Barua //     }
142b2b77a04SHong Zhang 
143b2b77a04SHong Zhang   PetscFunctionBegin;
144b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
145b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
146b2b77a04SHong Zhang #endif
147c92418ccSAmlan Barua //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
148c92418ccSAmlan Barua //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
14911902ff2SHong Zhang 
150b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
151b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
152b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
153b2b77a04SHong Zhang     switch (ndim){
154b2b77a04SHong Zhang     case 1:
155*3c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
156b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
157*3c3a9c75SAmlan Barua #endif
158b2b77a04SHong Zhang       break;
159b2b77a04SHong Zhang     case 2:
160*3c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
161b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
162*3c3a9c75SAmlan Barua #else
163*3c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
164*3c3a9c75SAmlan Barua #endif
165b2b77a04SHong Zhang       break;
166b2b77a04SHong Zhang     case 3:
167*3c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
168b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
169*3c3a9c75SAmlan Barua #else
170*3c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[3],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
171*3c3a9c75SAmlan Barua #endif
172b2b77a04SHong Zhang       break;
173b2b77a04SHong Zhang     default:
174*3c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
175c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
176*3c3a9c75SAmlan Barua #else
177*3c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
178*3c3a9c75SAmlan Barua #endif
17911902ff2SHong Zhang  //     fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
180b2b77a04SHong Zhang       break;
181b2b77a04SHong Zhang     }
182b2b77a04SHong Zhang     fftw->finarray  = x_array;
183b2b77a04SHong Zhang     fftw->foutarray = y_array;
184b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
185b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
186b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
187b2b77a04SHong Zhang   } else { /* use existing plan */
188b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
189b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
190b2b77a04SHong Zhang     } else {
191b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
192b2b77a04SHong Zhang     }
193b2b77a04SHong Zhang   }
194b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
195b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
196b2b77a04SHong Zhang   PetscFunctionReturn(0);
197b2b77a04SHong Zhang }
198b2b77a04SHong Zhang 
199b2b77a04SHong Zhang #undef __FUNCT__
200b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
201b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
202b2b77a04SHong Zhang {
203b2b77a04SHong Zhang   PetscErrorCode ierr;
204b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
205b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
206b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
207c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
208b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
209c92418ccSAmlan Barua //  PetscInt       ctr;
210c92418ccSAmlan Barua //  ptrdiff_t      ndim1=(ptrdiff_t)ndim,*pdim;
211c92418ccSAmlan Barua //  ndim1=(ptrdiff_t) ndim;
212c92418ccSAmlan Barua //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
213c92418ccSAmlan Barua 
214c92418ccSAmlan Barua //  for(ctr=0;ctr<ndim;ctr++)
215c92418ccSAmlan Barua //     {
216c92418ccSAmlan Barua //      pdim[ctr] = dim[ctr];
217c92418ccSAmlan Barua //     }
218b2b77a04SHong Zhang 
219b2b77a04SHong Zhang   PetscFunctionBegin;
220*3c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
221*3c3a9c75SAmlan Barua //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
222*3c3a9c75SAmlan Barua //#endif
223c92418ccSAmlan Barua //  ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr);
224c92418ccSAmlan Barua // should pdim be a member of Mat_FFTW?
225c92418ccSAmlan Barua //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
22611902ff2SHong Zhang 
227b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
228b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
229b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
230b2b77a04SHong Zhang     switch (ndim){
231b2b77a04SHong Zhang     case 1:
232*3c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
233b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
234*3c3a9c75SAmlan Barua #endif
235b2b77a04SHong Zhang       break;
236b2b77a04SHong Zhang     case 2:
237*3c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
238b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
239*3c3a9c75SAmlan Barua #else
240*3c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
241*3c3a9c75SAmlan Barua #endif
242b2b77a04SHong Zhang       break;
243b2b77a04SHong Zhang     case 3:
244*3c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
245b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
246*3c3a9c75SAmlan Barua #else
247*3c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
248*3c3a9c75SAmlan Barua #endif
249b2b77a04SHong Zhang       break;
250b2b77a04SHong Zhang     default:
251*3c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
252c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
253*3c3a9c75SAmlan Barua #else
254*3c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
255*3c3a9c75SAmlan Barua #endif
256c92418ccSAmlan Barua //      fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
257b2b77a04SHong Zhang       break;
258b2b77a04SHong Zhang     }
259b2b77a04SHong Zhang     fftw->binarray  = x_array;
260b2b77a04SHong Zhang     fftw->boutarray = y_array;
261b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
262b2b77a04SHong Zhang   } else { /* use existing plan */
263b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
264b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
265b2b77a04SHong Zhang     } else {
266b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
267b2b77a04SHong Zhang     }
268b2b77a04SHong Zhang   }
269b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
270b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
271b2b77a04SHong Zhang   PetscFunctionReturn(0);
272b2b77a04SHong Zhang }
273b2b77a04SHong Zhang 
274b2b77a04SHong Zhang #undef __FUNCT__
275b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
276b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
277b2b77a04SHong Zhang {
278b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
279b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
280b2b77a04SHong Zhang   PetscErrorCode ierr;
281b2b77a04SHong Zhang 
282b2b77a04SHong Zhang   PetscFunctionBegin;
283b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
284b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
285b2b77a04SHong Zhang #endif
286b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
287b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
288b1301b2eSHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
289bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
290b2b77a04SHong Zhang   PetscFunctionReturn(0);
291b2b77a04SHong Zhang }
292b2b77a04SHong Zhang 
293c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
294b2b77a04SHong Zhang #undef __FUNCT__
295b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
296b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
297b2b77a04SHong Zhang {
298b2b77a04SHong Zhang   PetscErrorCode  ierr;
299b2b77a04SHong Zhang   PetscScalar     *array;
300b2b77a04SHong Zhang 
301b2b77a04SHong Zhang   PetscFunctionBegin;
302b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
303b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
304b2b77a04SHong Zhang #endif
305b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
306b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
307b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
308b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
309b2b77a04SHong Zhang   PetscFunctionReturn(0);
310b2b77a04SHong Zhang }
311b2b77a04SHong Zhang 
312b2b77a04SHong Zhang #undef __FUNCT__
313*3c3a9c75SAmlan Barua #define __FUNCT__ "MatGetVecs1DC_FFTW"
314c92418ccSAmlan Barua /*
315c92418ccSAmlan Barua    MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the
316c92418ccSAmlan Barua      parallel layout determined by FFTW-1D
317c92418ccSAmlan Barua 
318c92418ccSAmlan Barua */
319c92418ccSAmlan Barua PetscErrorCode  MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout)
320c92418ccSAmlan Barua {
321c92418ccSAmlan Barua   PetscErrorCode ierr;
322c92418ccSAmlan Barua   PetscMPIInt    size,rank;
323c92418ccSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
324c92418ccSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
325c92418ccSAmlan Barua //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
326c92418ccSAmlan Barua   PetscInt       N=fft->N;
327c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
328c92418ccSAmlan Barua   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
329c92418ccSAmlan Barua   ptrdiff_t      f_local_n1,f_local_1_end;
330c92418ccSAmlan Barua   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
331c92418ccSAmlan Barua   ptrdiff_t      b_local_n1,b_local_1_end;
332c92418ccSAmlan Barua   fftw_complex   *data_fin,*data_fout,*data_bin,*data_bout;
333c92418ccSAmlan Barua 
334c92418ccSAmlan Barua   PetscFunctionBegin;
335c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
336c92418ccSAmlan Barua   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
337c92418ccSAmlan Barua #endif
338c92418ccSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
339c92418ccSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
340c92418ccSAmlan Barua   if (size == 1){
341c92418ccSAmlan Barua     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
342c92418ccSAmlan Barua   }
343c92418ccSAmlan Barua   else {
344c92418ccSAmlan Barua       if (ndim>1){
345c92418ccSAmlan Barua         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
346c92418ccSAmlan Barua       else {
347c92418ccSAmlan Barua           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
348c92418ccSAmlan Barua           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
349c92418ccSAmlan Barua           if (fin) {
350c92418ccSAmlan Barua             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
351c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
352c92418ccSAmlan Barua             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
353c92418ccSAmlan Barua           }
354c92418ccSAmlan Barua           if (fout) {
355c92418ccSAmlan Barua             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
356c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
357c92418ccSAmlan Barua             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
358c92418ccSAmlan Barua           }
359c92418ccSAmlan Barua           if (bin) {
360c92418ccSAmlan Barua             data_bin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
361c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr);
362c92418ccSAmlan Barua             (*bin)->ops->destroy   = VecDestroy_MPIFFTW;
363c92418ccSAmlan Barua           }
364c92418ccSAmlan Barua           if (bout) {
365c92418ccSAmlan Barua             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
366c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
367c92418ccSAmlan Barua             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
368c92418ccSAmlan Barua           }
369c92418ccSAmlan Barua   }
370c92418ccSAmlan Barua   if (fin){
371c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
372c92418ccSAmlan Barua   }
373c92418ccSAmlan Barua   if (fout){
374c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
375c92418ccSAmlan Barua   }
376c92418ccSAmlan Barua   if (bin){
377c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr);
378c92418ccSAmlan Barua   }
379c92418ccSAmlan Barua   if (bout){
380c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
381c92418ccSAmlan Barua   }
382c92418ccSAmlan Barua   PetscFunctionReturn(0);
383c92418ccSAmlan Barua }
384c92418ccSAmlan Barua 
385c92418ccSAmlan Barua 
386c92418ccSAmlan Barua }
387*3c3a9c75SAmlan Barua 
388c92418ccSAmlan Barua #undef __FUNCT__
389b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW"
390b2b77a04SHong Zhang /*
391b2b77a04SHong Zhang    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
392b2b77a04SHong Zhang      parallel layout determined by FFTW
393b2b77a04SHong Zhang 
394b2b77a04SHong Zhang    Collective on Mat
395b2b77a04SHong Zhang 
396b2b77a04SHong Zhang    Input Parameter:
397b2b77a04SHong Zhang .  mat - the matrix
398b2b77a04SHong Zhang 
399b2b77a04SHong Zhang    Output Parameter:
400b2b77a04SHong Zhang +   fin - (optional) input vector of forward FFTW
401b2b77a04SHong Zhang -   fout - (optional) output vector of forward FFTW
402b2b77a04SHong Zhang 
403b2b77a04SHong Zhang   Level: advanced
404b2b77a04SHong Zhang 
405b2b77a04SHong Zhang .seealso: MatCreateFFTW()
406b2b77a04SHong Zhang */
407b2b77a04SHong Zhang PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
408b2b77a04SHong Zhang {
409b2b77a04SHong Zhang   PetscErrorCode ierr;
410b2b77a04SHong Zhang   PetscMPIInt    size,rank;
411b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
412b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
413c92418ccSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
414*3c3a9c75SAmlan Barua   PetscInt       N=fft->N, N1, n1,vsize;
415b2b77a04SHong Zhang 
416b2b77a04SHong Zhang   PetscFunctionBegin;
417*3c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
418*3c3a9c75SAmlan Barua //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
419*3c3a9c75SAmlan Barua //#endif
420b2b77a04SHong Zhang   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
421b2b77a04SHong Zhang   PetscValidType(A,1);
422b2b77a04SHong Zhang 
423b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
424b2b77a04SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
425b2b77a04SHong Zhang   if (size == 1){ /* sequential case */
426b2b77a04SHong Zhang     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
427b2b77a04SHong Zhang     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
428*3c3a9c75SAmlan Barua     printf("The code successfully comes at the end of the routine with one processor\n");
429b2b77a04SHong Zhang   } else {        /* mpi case */
430b2b77a04SHong Zhang     ptrdiff_t      alloc_local,local_n0,local_0_start;
4316882372aSHong Zhang     ptrdiff_t      local_n1,local_1_end;
432c92418ccSAmlan Barua     PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
433b2b77a04SHong Zhang     fftw_complex   *data_fin,*data_fout;
434*3c3a9c75SAmlan Barua     double         *data_finr, *data_foutr;
435*3c3a9c75SAmlan Barua     ptrdiff_t local_1_start;
436*3c3a9c75SAmlan Barua     PetscInt N1;
437c92418ccSAmlan Barua //    PetscInt ctr;
438c92418ccSAmlan Barua //    ptrdiff_t      ndim1,*pdim;
439c92418ccSAmlan Barua //    ndim1=(ptrdiff_t) ndim;
440c92418ccSAmlan Barua //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
44111902ff2SHong Zhang 
442c92418ccSAmlan Barua //    for(ctr=0;ctr<ndim;ctr++)
443c92418ccSAmlan Barua //        {
444c92418ccSAmlan Barua //           pdim[ctr] = dim[ctr];
445c92418ccSAmlan Barua //       }
446b2b77a04SHong Zhang 
447b2b77a04SHong Zhang     switch (ndim){
448b2b77a04SHong Zhang     case 1:
4496882372aSHong Zhang       /* Get local size */
450c92418ccSAmlan Barua       /* We need to write an error message here saying that one cannot call this routine when doing paralllel 1D complex FFTW */
451c92418ccSAmlan Barua //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
4526882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
4536882372aSHong Zhang       if (fin) {
4546882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4556882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
4566882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4576882372aSHong Zhang       }
4586882372aSHong Zhang       if (fout) {
4596882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4606882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
4616882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
4626882372aSHong Zhang       }
463b2b77a04SHong Zhang       break;
464b2b77a04SHong Zhang     case 2:
465*3c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
466*3c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
467*3c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
468*3c3a9c75SAmlan Barua       if (fin) {
469*3c3a9c75SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
470*3c3a9c75SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
471*3c3a9c75SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
472*3c3a9c75SAmlan Barua         printf("The code comes here with vector size %d\n",vsize);
473*3c3a9c75SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
474*3c3a9c75SAmlan Barua       }
475*3c3a9c75SAmlan Barua       if (fout) {
476*3c3a9c75SAmlan Barua         data_foutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
477*3c3a9c75SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_foutr,fout);CHKERRQ(ierr);
478*3c3a9c75SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
479*3c3a9c75SAmlan Barua       }
480*3c3a9c75SAmlan Barua       printf("The code successfully comes at the end of the routine %d, %d\n",n1,N1);
481*3c3a9c75SAmlan Barua 
482*3c3a9c75SAmlan Barua #else
483b2b77a04SHong Zhang       /* Get local size */
484b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
485b2b77a04SHong Zhang       if (fin) {
486b2b77a04SHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
487b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
488b2b77a04SHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
489b2b77a04SHong Zhang       }
490b2b77a04SHong Zhang       if (fout) {
491b2b77a04SHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
492b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
493b2b77a04SHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
494b2b77a04SHong Zhang       }
495*3c3a9c75SAmlan Barua      printf("Hope this does not come here");
496*3c3a9c75SAmlan Barua #endif
497b2b77a04SHong Zhang       break;
498b2b77a04SHong Zhang     case 3:
4996882372aSHong Zhang       /* Get local size */
500*3c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
501*3c3a9c75SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"Not done yet");
502*3c3a9c75SAmlan Barua #else
5036882372aSHong Zhang       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
50411902ff2SHong Zhang //      printf("The quantity n is %d",n);
5056882372aSHong Zhang       if (fin) {
5066882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5076882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5086882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5096882372aSHong Zhang       }
5106882372aSHong Zhang       if (fout) {
5116882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5126882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5136882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5146882372aSHong Zhang       }
515*3c3a9c75SAmlan Barua #endif
516b2b77a04SHong Zhang       break;
517b2b77a04SHong Zhang     default:
5186882372aSHong Zhang       /* Get local size */
519*3c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
520*3c3a9c75SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"Not done yet");
521*3c3a9c75SAmlan Barua #else
522c92418ccSAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
52311902ff2SHong Zhang //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
52411902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
52511902ff2SHong Zhang //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
52611902ff2SHong Zhang //      for(i=0;i<ndim;i++)
52711902ff2SHong Zhang //         {
52811902ff2SHong Zhang //          pdim[i]=dim[i];printf("%d",pdim[i]);
52911902ff2SHong Zhang //         }
53011902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
53111902ff2SHong Zhang //      printf("The quantity n is %d",n);
5326882372aSHong Zhang       if (fin) {
5336882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5346882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5356882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5366882372aSHong Zhang       }
5376882372aSHong Zhang       if (fout) {
5386882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5396882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5406882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5416882372aSHong Zhang       }
542*3c3a9c75SAmlan Barua #endif
543b2b77a04SHong Zhang       break;
544b2b77a04SHong Zhang     }
545b2b77a04SHong Zhang   }
546fb7c10e0SHong Zhang   if (fin){
547262f75f9SHong Zhang     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
548fb7c10e0SHong Zhang   }
549fb7c10e0SHong Zhang   if (fout){
550262f75f9SHong Zhang     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
551fb7c10e0SHong Zhang   }
552b2b77a04SHong Zhang   PetscFunctionReturn(0);
553b2b77a04SHong Zhang }
554b2b77a04SHong Zhang 
555*3c3a9c75SAmlan Barua //EXTERN_C_BEGIN - Do we need this?
556b2b77a04SHong Zhang #undef __FUNCT__
557*3c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT"
558*3c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
559*3c3a9c75SAmlan Barua {
560*3c3a9c75SAmlan Barua   PetscErrorCode ierr;
561*3c3a9c75SAmlan Barua   PetscFunctionBegin;
562*3c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
563*3c3a9c75SAmlan Barua   PetscFunctionReturn(0);
564*3c3a9c75SAmlan Barua }
565*3c3a9c75SAmlan Barua //EXTERN_C_END - Do we need this?
566b2b77a04SHong Zhang /*
567*3c3a9c75SAmlan Barua       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block
568*3c3a9c75SAmlan Barua   Input A, x, y
569*3c3a9c75SAmlan Barua   A - FFTW matrix
570*3c3a9c75SAmlan Barua   x - user d
571b2b77a04SHong Zhang   Options Database Keys:
572b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags
573b2b77a04SHong Zhang 
574b2b77a04SHong Zhang    Level: intermediate
575b2b77a04SHong Zhang 
576b2b77a04SHong Zhang */
577*3c3a9c75SAmlan Barua 
578*3c3a9c75SAmlan Barua EXTERN_C_BEGIN
579*3c3a9c75SAmlan Barua #undef __FUNCT__
580*3c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW"
581*3c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
582*3c3a9c75SAmlan Barua {
583*3c3a9c75SAmlan Barua   PetscErrorCode ierr;
584*3c3a9c75SAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
585*3c3a9c75SAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
586*3c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
587*3c3a9c75SAmlan Barua   PetscInt       N=fft->N, N1, n1 ,NM;
588*3c3a9c75SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
589*3c3a9c75SAmlan Barua   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
590*3c3a9c75SAmlan Barua   PetscInt       i,j;
591*3c3a9c75SAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
592*3c3a9c75SAmlan Barua   ptrdiff_t      local_n1,local_1_start;
593*3c3a9c75SAmlan Barua   VecScatter     vecscat;
594*3c3a9c75SAmlan Barua   IS             list1,list2;
595*3c3a9c75SAmlan Barua 
596*3c3a9c75SAmlan Barua   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
597*3c3a9c75SAmlan Barua 
598*3c3a9c75SAmlan Barua  switch (ndim){
599*3c3a9c75SAmlan Barua  case 1:
600*3c3a9c75SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"Not Supported by FFTW");
601*3c3a9c75SAmlan Barua   break;
602*3c3a9c75SAmlan Barua  case 2:
603*3c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
604*3c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
605*3c3a9c75SAmlan Barua 
606*3c3a9c75SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*local_n0*N1,&indx1);CHKERRQ(ierr);
607*3c3a9c75SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*local_n0*N1,&indx2);CHKERRQ(ierr);
608*3c3a9c75SAmlan Barua 
609*3c3a9c75SAmlan Barua      if (dim[1]%2==0)
610*3c3a9c75SAmlan Barua       NM = dim[1]+2;
611*3c3a9c75SAmlan Barua     else
612*3c3a9c75SAmlan Barua       NM = dim[1]+1;
613*3c3a9c75SAmlan Barua 
614*3c3a9c75SAmlan Barua      for (i=0;i<local_n0;i++){
615*3c3a9c75SAmlan Barua         for (j=0;j<dim[1];j++){
616*3c3a9c75SAmlan Barua             tempindx = i*dim[1] + j;
617*3c3a9c75SAmlan Barua             tempindx1 = i*NM + j;
618*3c3a9c75SAmlan Barua             indx1[tempindx]=local_0_start*N1+tempindx;
619*3c3a9c75SAmlan Barua             indx2[tempindx]=low+tempindx1;
620*3c3a9c75SAmlan Barua   //          printf("index3 %d from proc %d is \n",indx3[tempindx],rank);
621*3c3a9c75SAmlan Barua   //          printf("index4 %d from proc %d is \n",indx4[tempindx],rank);
622*3c3a9c75SAmlan Barua         }
623*3c3a9c75SAmlan Barua      }
624*3c3a9c75SAmlan Barua 
625*3c3a9c75SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
626*3c3a9c75SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
627*3c3a9c75SAmlan Barua 
628*3c3a9c75SAmlan Barua      ierr = VecScatterCreate(x,list1,y,list2,&vecscat);
629*3c3a9c75SAmlan Barua      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
630*3c3a9c75SAmlan Barua      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
631*3c3a9c75SAmlan Barua      ierr = VecScatterDestroy(&vecscat);
632*3c3a9c75SAmlan Barua      break;
633*3c3a9c75SAmlan Barua 
634*3c3a9c75SAmlan Barua  case 3:
635*3c3a9c75SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet");
636*3c3a9c75SAmlan Barua   break;
637*3c3a9c75SAmlan Barua 
638*3c3a9c75SAmlan Barua  default:
639*3c3a9c75SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet");
640*3c3a9c75SAmlan Barua   break;
641*3c3a9c75SAmlan Barua  }
642*3c3a9c75SAmlan Barua 
643*3c3a9c75SAmlan Barua  return 0;
644*3c3a9c75SAmlan Barua }
645*3c3a9c75SAmlan Barua EXTERN_C_END
646*3c3a9c75SAmlan Barua 
647*3c3a9c75SAmlan Barua /*
648*3c3a9c75SAmlan Barua //EXTERN_C_BEGIN - Do we need this?
649*3c3a9c75SAmlan Barua #undef __FUNCT__
650*3c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT"
651*3c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
652*3c3a9c75SAmlan Barua {
653*3c3a9c75SAmlan Barua   PetscErrorCode ierr;
654*3c3a9c75SAmlan Barua   PetscFunctionBegin;
655*3c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
656*3c3a9c75SAmlan Barua   PetscFunctionReturn(0);
657*3c3a9c75SAmlan Barua }
658*3c3a9c75SAmlan Barua //EXTERN_C_END - Do we need this?
659*3c3a9c75SAmlan Barua */
660*3c3a9c75SAmlan Barua 
661*3c3a9c75SAmlan Barua EXTERN_C_BEGIN
662*3c3a9c75SAmlan Barua #undef __FUNCT__
663*3c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
664*3c3a9c75SAmlan Barua /*
665*3c3a9c75SAmlan Barua       MatCreate_FFTW - Creates a matrix object that provides FFT
666*3c3a9c75SAmlan Barua   via the external package FFTW
667*3c3a9c75SAmlan Barua   Options Database Keys:
668*3c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
669*3c3a9c75SAmlan Barua 
670*3c3a9c75SAmlan Barua    Level: intermediate
671*3c3a9c75SAmlan Barua 
672*3c3a9c75SAmlan Barua */
673*3c3a9c75SAmlan Barua 
674b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A)
675b2b77a04SHong Zhang {
676b2b77a04SHong Zhang   PetscErrorCode ierr;
677b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
678b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
679b2b77a04SHong Zhang   Mat_FFTW       *fftw;
680b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
681b2b77a04SHong Zhang   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
682b2b77a04SHong Zhang   PetscBool      flg;
6836882372aSHong Zhang   PetscInt       p_flag,partial_dim=1,ctr;
68411902ff2SHong Zhang   PetscMPIInt    size,rank;
68511902ff2SHong Zhang   ptrdiff_t      *pdim;
686b2b77a04SHong Zhang 
687b2b77a04SHong Zhang   PetscFunctionBegin;
688*3c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
689*3c3a9c75SAmlan Barua //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
690*3c3a9c75SAmlan Barua //#endif
691b2b77a04SHong Zhang 
692b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
69311902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
694c92418ccSAmlan Barua 
69511902ff2SHong Zhang   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
69611902ff2SHong Zhang   pdim[0] = dim[0];
69711902ff2SHong Zhang   for(ctr=1;ctr<ndim;ctr++)
69811902ff2SHong Zhang       {
6996882372aSHong Zhang           partial_dim *= dim[ctr];
70011902ff2SHong Zhang           pdim[ctr] = dim[ctr];
7016882372aSHong Zhang       }
702*3c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
703*3c3a9c75SAmlan Barua //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
704*3c3a9c75SAmlan Barua //#endif
705*3c3a9c75SAmlan Barua 
70611902ff2SHong Zhang //  printf("partial dimension is %d",partial_dim);
707b2b77a04SHong Zhang   if (size == 1) {
708b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
709b2b77a04SHong Zhang     n = N;
710b2b77a04SHong Zhang   } else {
7116882372aSHong Zhang     ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end;
712b2b77a04SHong Zhang     switch (ndim){
713b2b77a04SHong Zhang     case 1:
714*3c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
715*3c3a9c75SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
716*3c3a9c75SAmlan Barua #endif
7176882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
7186882372aSHong Zhang       n = (PetscInt)local_n0;
7196882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
720*3c3a9c75SAmlan Barua //      PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW);
721b2b77a04SHong Zhang       break;
722b2b77a04SHong Zhang     case 2:
723b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
724b2b77a04SHong Zhang       /*
725b2b77a04SHong Zhang        PetscMPIInt    rank;
726b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
727b2b77a04SHong Zhang        PetscSynchronizedFlush(comm);
728b2b77a04SHong Zhang        */
729b2b77a04SHong Zhang       n = (PetscInt)local_n0*dim[1];
730b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
731b2b77a04SHong Zhang       break;
732b2b77a04SHong Zhang     case 3:
73311902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
7346882372aSHong Zhang       n = (PetscInt)local_n0*dim[1]*dim[2];
7356882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
736b2b77a04SHong Zhang       break;
737b2b77a04SHong Zhang     default:
73811902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
739c92418ccSAmlan Barua //      printf("The value of alloc local is %ld from process %d\n",alloc_local,rank);
74011902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
7416882372aSHong Zhang       n = (PetscInt)local_n0*partial_dim;
74211902ff2SHong Zhang //      printf("New partial dimension is %d %d %d",n,N,ndim);
7436882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
744b2b77a04SHong Zhang       break;
745b2b77a04SHong Zhang     }
746b2b77a04SHong Zhang   }
747b2b77a04SHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
748b2b77a04SHong Zhang 
749b2b77a04SHong Zhang   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
750b2b77a04SHong Zhang   fft->data = (void*)fftw;
751b2b77a04SHong Zhang 
752b2b77a04SHong Zhang   fft->n           = n;
753c92418ccSAmlan Barua   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
754c92418ccSAmlan Barua   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
755b1301b2eSHong Zhang   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
756c92418ccSAmlan Barua 
757b2b77a04SHong Zhang   fftw->p_forward  = 0;
758b2b77a04SHong Zhang   fftw->p_backward = 0;
759b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
760b2b77a04SHong Zhang 
761b2b77a04SHong Zhang   if (size == 1){
762b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
763b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
764b2b77a04SHong Zhang   } else {
765b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
766b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
767b2b77a04SHong Zhang   }
768b2b77a04SHong Zhang   fft->matdestroy          = MatDestroy_FFTW;
769b2b77a04SHong Zhang   A->ops->getvecs       = MatGetVecs_FFTW;
770b2b77a04SHong Zhang   A->assembled          = PETSC_TRUE;
771*3c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
772*3c3a9c75SAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
773*3c3a9c75SAmlan Barua //  PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
774*3c3a9c75SAmlan Barua #endif
775b2b77a04SHong Zhang 
776b2b77a04SHong Zhang   /* get runtime options */
777b2b77a04SHong Zhang   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
778b2b77a04SHong Zhang     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
779b2b77a04SHong Zhang     if (flg) {fftw->p_flag = (unsigned)p_flag;}
780b2b77a04SHong Zhang   PetscOptionsEnd();
781b2b77a04SHong Zhang   PetscFunctionReturn(0);
782b2b77a04SHong Zhang }
783b2b77a04SHong Zhang EXTERN_C_END
784*3c3a9c75SAmlan Barua 
785*3c3a9c75SAmlan Barua 
786*3c3a9c75SAmlan Barua 
787*3c3a9c75SAmlan Barua 
788