xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 64657d84d43e0896dae809bb9337e08748670383)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14e5d7f247SAmlan Barua   PetscInt   partial_dim;
15b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
16b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
19b2b77a04SHong Zhang } Mat_FFTW;
20b2b77a04SHong Zhang 
21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
28b2b77a04SHong Zhang 
29b2b77a04SHong Zhang #undef __FUNCT__
30b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
31b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
32b2b77a04SHong Zhang {
33b2b77a04SHong Zhang   PetscErrorCode ierr;
34b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
35b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
36b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
37b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
38b2b77a04SHong Zhang 
39b2b77a04SHong Zhang   PetscFunctionBegin;
40b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
41b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
42b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
43b2b77a04SHong Zhang     switch (ndim){
44b2b77a04SHong Zhang     case 1:
4558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
46b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
4758a912c5SAmlan Barua #else
4858a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
4958a912c5SAmlan Barua #endif
50b2b77a04SHong Zhang       break;
51b2b77a04SHong Zhang     case 2:
5258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
53b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
5458a912c5SAmlan Barua #else
5558a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
5658a912c5SAmlan Barua #endif
57b2b77a04SHong Zhang       break;
58b2b77a04SHong Zhang     case 3:
5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
60b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6158a912c5SAmlan Barua #else
6258a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
6358a912c5SAmlan Barua #endif
64b2b77a04SHong Zhang       break;
65b2b77a04SHong Zhang     default:
6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
67b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6858a912c5SAmlan Barua #else
6958a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
7058a912c5SAmlan Barua #endif
71b2b77a04SHong Zhang       break;
72b2b77a04SHong Zhang     }
73b2b77a04SHong Zhang     fftw->finarray  = x_array;
74b2b77a04SHong Zhang     fftw->foutarray = y_array;
75b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
76b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
77b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
78b2b77a04SHong Zhang   } else { /* use existing plan */
79b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
80b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
81b2b77a04SHong Zhang     } else {
82b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
83b2b77a04SHong Zhang     }
84b2b77a04SHong Zhang   }
85b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
86b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
87b2b77a04SHong Zhang   PetscFunctionReturn(0);
88b2b77a04SHong Zhang }
89b2b77a04SHong Zhang 
90b2b77a04SHong Zhang #undef __FUNCT__
91b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
92b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
93b2b77a04SHong Zhang {
94b2b77a04SHong Zhang   PetscErrorCode ierr;
95b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
96b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
97b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
98b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
99b2b77a04SHong Zhang 
100b2b77a04SHong Zhang   PetscFunctionBegin;
101b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
102b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
103b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
104b2b77a04SHong Zhang     switch (ndim){
105b2b77a04SHong Zhang     case 1:
10658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
107b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
10858a912c5SAmlan Barua #else
109e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
11058a912c5SAmlan Barua #endif
111b2b77a04SHong Zhang       break;
112b2b77a04SHong Zhang     case 2:
11358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
114b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
11558a912c5SAmlan Barua #else
116e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
11758a912c5SAmlan Barua #endif
118b2b77a04SHong Zhang       break;
119b2b77a04SHong Zhang     case 3:
12058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
121b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12258a912c5SAmlan Barua #else
123e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
12458a912c5SAmlan Barua #endif
125b2b77a04SHong Zhang       break;
126b2b77a04SHong Zhang     default:
12758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
128b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12958a912c5SAmlan Barua #else
130e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
13158a912c5SAmlan Barua #endif
132b2b77a04SHong Zhang       break;
133b2b77a04SHong Zhang     }
134b2b77a04SHong Zhang     fftw->binarray  = x_array;
135b2b77a04SHong Zhang     fftw->boutarray = y_array;
136b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
137b2b77a04SHong Zhang   } else { /* use existing plan */
138b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
139b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
140b2b77a04SHong Zhang     } else {
141b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
142b2b77a04SHong Zhang     }
143b2b77a04SHong Zhang   }
144b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
145b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
146b2b77a04SHong Zhang   PetscFunctionReturn(0);
147b2b77a04SHong Zhang }
148b2b77a04SHong Zhang 
149b2b77a04SHong Zhang #undef __FUNCT__
150b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
151b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
152b2b77a04SHong Zhang {
153b2b77a04SHong Zhang   PetscErrorCode ierr;
154b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
155b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
156b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
157c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
158b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
159b2b77a04SHong Zhang 
160b2b77a04SHong Zhang   PetscFunctionBegin;
161b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
162b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
163b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
164b2b77a04SHong Zhang     switch (ndim){
165b2b77a04SHong Zhang     case 1:
1663c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
167b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
168ae0a50aaSHong Zhang #else
169ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
1703c3a9c75SAmlan Barua #endif
171b2b77a04SHong Zhang       break;
172b2b77a04SHong Zhang     case 2:
1733c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
174b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1753c3a9c75SAmlan Barua #else
1763c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1773c3a9c75SAmlan Barua #endif
178b2b77a04SHong Zhang       break;
179b2b77a04SHong Zhang     case 3:
1803c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
181b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1823c3a9c75SAmlan Barua #else
18351d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1843c3a9c75SAmlan Barua #endif
185b2b77a04SHong Zhang       break;
186b2b77a04SHong Zhang     default:
1873c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
188c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1893c3a9c75SAmlan Barua #else
1903c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1913c3a9c75SAmlan Barua #endif
192b2b77a04SHong Zhang       break;
193b2b77a04SHong Zhang     }
194b2b77a04SHong Zhang     fftw->finarray  = x_array;
195b2b77a04SHong Zhang     fftw->foutarray = y_array;
196b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
197b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
198b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
199b2b77a04SHong Zhang   } else { /* use existing plan */
200b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
201b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
202b2b77a04SHong Zhang     } else {
203b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
204b2b77a04SHong Zhang     }
205b2b77a04SHong Zhang   }
206b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
207b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
208b2b77a04SHong Zhang   PetscFunctionReturn(0);
209b2b77a04SHong Zhang }
210b2b77a04SHong Zhang 
211b2b77a04SHong Zhang #undef __FUNCT__
212b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
213b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
214b2b77a04SHong Zhang {
215b2b77a04SHong Zhang   PetscErrorCode ierr;
216b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
217b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
218b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
219c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
220b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
221b2b77a04SHong Zhang 
222b2b77a04SHong Zhang   PetscFunctionBegin;
223b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
224b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
225b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
226b2b77a04SHong Zhang     switch (ndim){
227b2b77a04SHong Zhang     case 1:
2283c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
229b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
230ae0a50aaSHong Zhang #else
231ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
2323c3a9c75SAmlan Barua #endif
233b2b77a04SHong Zhang       break;
234b2b77a04SHong Zhang     case 2:
2353c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
236b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2373c3a9c75SAmlan Barua #else
2383c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2393c3a9c75SAmlan Barua #endif
240b2b77a04SHong Zhang       break;
241b2b77a04SHong Zhang     case 3:
2423c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
243b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2443c3a9c75SAmlan Barua #else
2453c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2463c3a9c75SAmlan Barua #endif
247b2b77a04SHong Zhang       break;
248b2b77a04SHong Zhang     default:
2493c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
250c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2513c3a9c75SAmlan Barua #else
2523c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2533c3a9c75SAmlan Barua #endif
254b2b77a04SHong Zhang       break;
255b2b77a04SHong Zhang     }
256b2b77a04SHong Zhang     fftw->binarray  = x_array;
257b2b77a04SHong Zhang     fftw->boutarray = y_array;
258b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
259b2b77a04SHong Zhang   } else { /* use existing plan */
260b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
261b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
262b2b77a04SHong Zhang     } else {
263b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
264b2b77a04SHong Zhang     }
265b2b77a04SHong Zhang   }
266b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
267b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
268b2b77a04SHong Zhang   PetscFunctionReturn(0);
269b2b77a04SHong Zhang }
270b2b77a04SHong Zhang 
271b2b77a04SHong Zhang #undef __FUNCT__
272b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
273b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
274b2b77a04SHong Zhang {
275b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
276b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
277b2b77a04SHong Zhang   PetscErrorCode ierr;
278b2b77a04SHong Zhang 
279b2b77a04SHong Zhang   PetscFunctionBegin;
280b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
281b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
282b1301b2eSHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
283bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
284b2b77a04SHong Zhang   PetscFunctionReturn(0);
285b2b77a04SHong Zhang }
286b2b77a04SHong Zhang 
287c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
288b2b77a04SHong Zhang #undef __FUNCT__
289b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
290b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
291b2b77a04SHong Zhang {
292b2b77a04SHong Zhang   PetscErrorCode  ierr;
293b2b77a04SHong Zhang   PetscScalar     *array;
294b2b77a04SHong Zhang 
295b2b77a04SHong Zhang   PetscFunctionBegin;
296b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
297b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
298b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
299b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
300b2b77a04SHong Zhang   PetscFunctionReturn(0);
301b2b77a04SHong Zhang }
302b2b77a04SHong Zhang 
303b2b77a04SHong Zhang #undef __FUNCT__
3043c3a9c75SAmlan Barua #define __FUNCT__ "MatGetVecs1DC_FFTW"
305c92418ccSAmlan Barua /*
306c92418ccSAmlan Barua    MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the
307c92418ccSAmlan Barua      parallel layout determined by FFTW-1D
308c92418ccSAmlan Barua 
309c92418ccSAmlan Barua */
3106a506ed8SAmlan Barua PetscErrorCode  MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bout)
311c92418ccSAmlan Barua {
312c92418ccSAmlan Barua   PetscErrorCode ierr;
313c92418ccSAmlan Barua   PetscMPIInt    size,rank;
314c92418ccSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
315c92418ccSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
316c92418ccSAmlan Barua //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
317c92418ccSAmlan Barua   PetscInt       N=fft->N;
318c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
319c92418ccSAmlan Barua   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
320c92418ccSAmlan Barua   ptrdiff_t      f_local_n1,f_local_1_end;
321c92418ccSAmlan Barua   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
322c92418ccSAmlan Barua   ptrdiff_t      b_local_n1,b_local_1_end;
323ae0a50aaSHong Zhang   fftw_complex   *data_fin,*data_fout,*data_bout;
324c92418ccSAmlan Barua 
325c92418ccSAmlan Barua   PetscFunctionBegin;
326c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
327c92418ccSAmlan Barua   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
328c92418ccSAmlan Barua #endif
329c92418ccSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
330c92418ccSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
331c92418ccSAmlan Barua   if (size == 1){
332c92418ccSAmlan Barua     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
333c92418ccSAmlan Barua   }
334c92418ccSAmlan Barua   else {
335c92418ccSAmlan Barua       if (ndim>1){
336c92418ccSAmlan Barua         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
337c92418ccSAmlan Barua       else {
338c92418ccSAmlan Barua           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
339c92418ccSAmlan Barua           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
340c92418ccSAmlan Barua           if (fin) {
341c92418ccSAmlan Barua             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
342c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
343c92418ccSAmlan Barua             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
344c92418ccSAmlan Barua           }
345c92418ccSAmlan Barua           if (fout) {
346c92418ccSAmlan Barua             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
347c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
348c92418ccSAmlan Barua             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
349c92418ccSAmlan Barua           }
350c92418ccSAmlan Barua           if (bout) {
351c92418ccSAmlan Barua             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
352c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
353c92418ccSAmlan Barua             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
354c92418ccSAmlan Barua           }
355c92418ccSAmlan Barua   }
356c92418ccSAmlan Barua   if (fin){
357c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
358c92418ccSAmlan Barua   }
359c92418ccSAmlan Barua   if (fout){
360c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
361c92418ccSAmlan Barua   }
362c92418ccSAmlan Barua   if (bout){
363c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
364c92418ccSAmlan Barua   }
365c92418ccSAmlan Barua   PetscFunctionReturn(0);
366c92418ccSAmlan Barua }
367c92418ccSAmlan Barua 
368c92418ccSAmlan Barua 
369c92418ccSAmlan Barua }
3703c3a9c75SAmlan Barua 
371c92418ccSAmlan Barua #undef __FUNCT__
372b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW"
373b2b77a04SHong Zhang /*
374b2b77a04SHong Zhang    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
375b2b77a04SHong Zhang      parallel layout determined by FFTW
376b2b77a04SHong Zhang 
377b2b77a04SHong Zhang    Collective on Mat
378b2b77a04SHong Zhang 
379b2b77a04SHong Zhang    Input Parameter:
380b2b77a04SHong Zhang .  mat - the matrix
381b2b77a04SHong Zhang 
382b2b77a04SHong Zhang    Output Parameter:
383b2b77a04SHong Zhang +   fin - (optional) input vector of forward FFTW
384b2b77a04SHong Zhang -   fout - (optional) output vector of forward FFTW
385b2b77a04SHong Zhang 
386b2b77a04SHong Zhang   Level: advanced
387b2b77a04SHong Zhang 
388b2b77a04SHong Zhang .seealso: MatCreateFFTW()
389b2b77a04SHong Zhang */
390b2b77a04SHong Zhang PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
391b2b77a04SHong Zhang {
392b2b77a04SHong Zhang   PetscErrorCode ierr;
393b2b77a04SHong Zhang   PetscMPIInt    size,rank;
394b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
395b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
396c92418ccSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
3973c3a9c75SAmlan Barua   PetscInt       N=fft->N, N1, n1,vsize;
398e81bb599SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
399b2b77a04SHong Zhang 
400b2b77a04SHong Zhang   PetscFunctionBegin;
401b2b77a04SHong Zhang   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
402b2b77a04SHong Zhang   PetscValidType(A,1);
403b2b77a04SHong Zhang 
404b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
405b2b77a04SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
406b2b77a04SHong Zhang   if (size == 1){ /* sequential case */
407e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX)
408b2b77a04SHong Zhang     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
409b2b77a04SHong Zhang     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
410e5d7f247SAmlan Barua #else
411e81bb599SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
412e81bb599SAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
413e81bb599SAmlan Barua #endif
414b2b77a04SHong Zhang   } else {        /* mpi case */
415b2b77a04SHong Zhang     ptrdiff_t      alloc_local,local_n0,local_0_start;
4166882372aSHong Zhang     ptrdiff_t      local_n1,local_1_end;
417b2b77a04SHong Zhang     fftw_complex   *data_fin,*data_fout;
41851d1eed7SAmlan Barua     double         *data_finr ;
419b3a17365SAmlan Barua     ptrdiff_t      local_1_start,temp;
420c92418ccSAmlan Barua //    PetscInt ctr;
421c92418ccSAmlan Barua //    ptrdiff_t      ndim1,*pdim;
422c92418ccSAmlan Barua //    ndim1=(ptrdiff_t) ndim;
423c92418ccSAmlan Barua //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
42411902ff2SHong Zhang 
425c92418ccSAmlan Barua //    for(ctr=0;ctr<ndim;ctr++)
426f76f76beSAmlan Barua //        {k
427c92418ccSAmlan Barua //           pdim[ctr] = dim[ctr];
428c92418ccSAmlan Barua //       }
429b2b77a04SHong Zhang 
430f76f76beSAmlan Barua 
431f76f76beSAmlan Barua 
432b2b77a04SHong Zhang     switch (ndim){
433b2b77a04SHong Zhang     case 1:
4346882372aSHong Zhang       /* Get local size */
435e5d7f247SAmlan Barua       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
436c92418ccSAmlan Barua //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
4376882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
4386882372aSHong Zhang       if (fin) {
4396882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4406882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
4416882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4426882372aSHong Zhang       }
4436882372aSHong Zhang       if (fout) {
4446882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4456882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
4466882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
4476882372aSHong Zhang       }
448b2b77a04SHong Zhang       break;
449b2b77a04SHong Zhang     case 2:
4503c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4513c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4523c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
4533c3a9c75SAmlan Barua       if (fin) {
4543c3a9c75SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
45554dd5118SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
4563c3a9c75SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
45709dd8a53SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
4583c3a9c75SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4593c3a9c75SAmlan Barua       }
4603c3a9c75SAmlan Barua       if (fout) {
46109dd8a53SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
46209dd8a53SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
4633c3a9c75SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
4643c3a9c75SAmlan Barua       }
465f76f76beSAmlan Barua       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
4663c3a9c75SAmlan Barua 
4673c3a9c75SAmlan Barua #else
468b2b77a04SHong Zhang       /* Get local size */
469*64657d84SAmlan Barua      //printf("Hope this does not come here");
470b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
471b2b77a04SHong Zhang       if (fin) {
472b2b77a04SHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
473b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
474b2b77a04SHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
475b2b77a04SHong Zhang       }
476b2b77a04SHong Zhang       if (fout) {
477b2b77a04SHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
478b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
479b2b77a04SHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
480b2b77a04SHong Zhang       }
481*64657d84SAmlan Barua      //printf("Hope this does not come here");
4823c3a9c75SAmlan Barua #endif
483b2b77a04SHong Zhang       break;
484b2b77a04SHong Zhang     case 3:
4856882372aSHong Zhang       /* Get local size */
4863c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
48751d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
48851d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
48951d1eed7SAmlan Barua       if (fin) {
49051d1eed7SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
49151d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
49251d1eed7SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
49351d1eed7SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
49451d1eed7SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
49551d1eed7SAmlan Barua       }
49651d1eed7SAmlan Barua       if (fout) {
49751d1eed7SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
49851d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
49951d1eed7SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
50051d1eed7SAmlan Barua       }
50151d1eed7SAmlan Barua       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
50251d1eed7SAmlan Barua 
50351d1eed7SAmlan Barua 
5043c3a9c75SAmlan Barua #else
5056882372aSHong Zhang       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
50611902ff2SHong Zhang //      printf("The quantity n is %d",n);
5076882372aSHong Zhang       if (fin) {
5086882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5096882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5106882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5116882372aSHong Zhang       }
5126882372aSHong Zhang       if (fout) {
5136882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5146882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5156882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5166882372aSHong Zhang       }
5173c3a9c75SAmlan Barua #endif
518b2b77a04SHong Zhang       break;
519b2b77a04SHong Zhang     default:
5206882372aSHong Zhang       /* Get local size */
5213c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
522b3a17365SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
523b3a17365SAmlan Barua       printf("The value of temp is %ld\n",temp);
524b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
525b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
526b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
527b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
528b3a17365SAmlan Barua       if (fin) {
529b3a17365SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
530b3a17365SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
531b3a17365SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
532b3a17365SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
533b3a17365SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
534b3a17365SAmlan Barua       }
535b3a17365SAmlan Barua       if (fout) {
536b3a17365SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
537b3a17365SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
538b3a17365SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
539b3a17365SAmlan Barua       }
540b3a17365SAmlan Barua 
5413c3a9c75SAmlan Barua #else
542c92418ccSAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
54311902ff2SHong Zhang //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
54411902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
54511902ff2SHong Zhang //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
54611902ff2SHong Zhang //      for(i=0;i<ndim;i++)
54711902ff2SHong Zhang //         {
54811902ff2SHong Zhang //          pdim[i]=dim[i];printf("%d",pdim[i]);
54911902ff2SHong Zhang //         }
55011902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
55111902ff2SHong Zhang //      printf("The quantity n is %d",n);
5526882372aSHong Zhang       if (fin) {
5536882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5546882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5556882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5566882372aSHong Zhang       }
5576882372aSHong Zhang       if (fout) {
5586882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5596882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5606882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5616882372aSHong Zhang       }
5623c3a9c75SAmlan Barua #endif
563b2b77a04SHong Zhang       break;
564b2b77a04SHong Zhang     }
565b2b77a04SHong Zhang   }
56654dd5118SAmlan Barua //  if (fin){
56754dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
56854dd5118SAmlan Barua //  }
56954dd5118SAmlan Barua //  if (fout){
57054dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
57154dd5118SAmlan Barua //  }
572b2b77a04SHong Zhang   PetscFunctionReturn(0);
573b2b77a04SHong Zhang }
574b2b77a04SHong Zhang 
575b2b77a04SHong Zhang #undef __FUNCT__
5763c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT"
5773c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
5783c3a9c75SAmlan Barua {
5793c3a9c75SAmlan Barua   PetscErrorCode ierr;
5803c3a9c75SAmlan Barua   PetscFunctionBegin;
5813c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
5823c3a9c75SAmlan Barua   PetscFunctionReturn(0);
5833c3a9c75SAmlan Barua }
58454efbe56SHong Zhang 
585b2b77a04SHong Zhang /*
5863c3a9c75SAmlan Barua       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block
5873c3a9c75SAmlan Barua   Input A, x, y
5883c3a9c75SAmlan Barua   A - FFTW matrix
58954dd5118SAmlan Barua   x - user data
590b2b77a04SHong Zhang   Options Database Keys:
591b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags
592b2b77a04SHong Zhang 
593b2b77a04SHong Zhang    Level: intermediate
594b2b77a04SHong Zhang 
595b2b77a04SHong Zhang */
5963c3a9c75SAmlan Barua 
5973c3a9c75SAmlan Barua EXTERN_C_BEGIN
5983c3a9c75SAmlan Barua #undef __FUNCT__
5993c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW"
6003c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
6013c3a9c75SAmlan Barua {
6023c3a9c75SAmlan Barua   PetscErrorCode ierr;
6033c3a9c75SAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
6043c3a9c75SAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
6053c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
6063c3a9c75SAmlan Barua   PetscInt       N=fft->N, N1, n1 ,NM;
607b223cc97SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
608b223cc97SAmlan Barua   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
609e5d7f247SAmlan Barua   PetscInt       i,j,k,rank,size,partial_dim;
6103c3a9c75SAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
611e5d7f247SAmlan Barua   ptrdiff_t      local_n1,local_1_start,temp;
6123c3a9c75SAmlan Barua   VecScatter     vecscat;
6133c3a9c75SAmlan Barua   IS             list1,list2;
6143c3a9c75SAmlan Barua 
615f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
616f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
6173c3a9c75SAmlan Barua   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
618f76f76beSAmlan Barua   printf("Local ownership starts at %d\n",low);
6193c3a9c75SAmlan Barua 
620e81bb599SAmlan Barua   if (size==1)
621e81bb599SAmlan Barua     {
6227536937bSAmlan Barua /*     switch (ndim){
623e81bb599SAmlan Barua      case 1:
624e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
625e81bb599SAmlan Barua           for (i=0;i<dim[0];i++)
626e81bb599SAmlan Barua              {
627e81bb599SAmlan Barua               indx1[i] = i;
628e81bb599SAmlan Barua              }
629e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
630e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
631e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
632e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
633e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
634b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
635b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
636e81bb599SAmlan Barua      break;
637e81bb599SAmlan Barua 
638e81bb599SAmlan Barua      case 2:
639e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
640e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
641e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
642e81bb599SAmlan Barua                 indx1[i*dim[1]+j] = i*dim[1] + j;
643e81bb599SAmlan Barua              }
644e81bb599SAmlan Barua           }
645e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
646e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
647e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
648e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
649e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
650b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
651b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
652e81bb599SAmlan Barua           break;
653e81bb599SAmlan Barua      case 3:
654e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
655e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
656e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
657e81bb599SAmlan Barua                 for (k=0;k<dim[2];k++){
658e81bb599SAmlan Barua                    indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
659e81bb599SAmlan Barua                 }
660e81bb599SAmlan Barua              }
661e81bb599SAmlan Barua           }
662e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
663e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
664e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
665e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
666e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
667b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
668b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
669e81bb599SAmlan Barua           break;
670e81bb599SAmlan Barua      default:
6717536937bSAmlan Barua */
6726971673cSAmlan Barua           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);
6736971673cSAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
6746971673cSAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
6756971673cSAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
6766971673cSAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
677b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
6786971673cSAmlan Barua           //ierr = ISDestroy(list1);CHKERRQ(ierr);
6797536937bSAmlan Barua  //         break;
6807536937bSAmlan Barua   //    }
681e81bb599SAmlan Barua     }
682e81bb599SAmlan Barua 
683e81bb599SAmlan Barua  else{
684e81bb599SAmlan Barua 
6853c3a9c75SAmlan Barua  switch (ndim){
6863c3a9c75SAmlan Barua  case 1:
687*64657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
688*64657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
689*64657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
690*64657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
691*64657d84SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
692*64657d84SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
693*64657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
694*64657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
695*64657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
696*64657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
697*64657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
698*64657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
699*64657d84SAmlan Barua       break;
700*64657d84SAmlan Barua #else
701e5d7f247SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
702*64657d84SAmlan Barua #endif
7033c3a9c75SAmlan Barua   break;
7043c3a9c75SAmlan Barua  case 2:
705bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
706bd59e6c6SAmlan Barua       //PetscInt my_value;
707bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
708bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
709bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
710bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
711bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
712bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
713bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
714bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
715bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
716bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
717bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
718bd59e6c6SAmlan Barua       break;
719bd59e6c6SAmlan Barua #else
7203c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
7213c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
7223c3a9c75SAmlan Barua 
7235b3e41ffSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
7245b3e41ffSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
7255b3e41ffSAmlan Barua       printf("Val local_0_start = %ld\n",local_0_start);
7263c3a9c75SAmlan Barua 
7273c3a9c75SAmlan Barua       if (dim[1]%2==0)
7283c3a9c75SAmlan Barua         NM = dim[1]+2;
7293c3a9c75SAmlan Barua       else
7303c3a9c75SAmlan Barua         NM = dim[1]+1;
7313c3a9c75SAmlan Barua 
7323c3a9c75SAmlan Barua       for (i=0;i<local_n0;i++){
7333c3a9c75SAmlan Barua          for (j=0;j<dim[1];j++){
7343c3a9c75SAmlan Barua             tempindx = i*dim[1] + j;
7353c3a9c75SAmlan Barua             tempindx1 = i*NM + j;
7365b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
7373c3a9c75SAmlan Barua             indx2[tempindx]=low+tempindx1;
7385b3e41ffSAmlan Barua            // printf("Val tempindx1 = %d\n",tempindx1);
7395b3e41ffSAmlan Barua   //          printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
7405b3e41ffSAmlan Barua   //          printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
7415b3e41ffSAmlan Barua   //          printf("-------------------------\n",indx2[tempindx],rank);
7423c3a9c75SAmlan Barua         }
7433c3a9c75SAmlan Barua      }
7443c3a9c75SAmlan Barua 
7453c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
7463c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
7473c3a9c75SAmlan Barua 
748f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
749f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
750f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
751f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
752b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
753b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
754b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
755b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
7563c3a9c75SAmlan Barua       break;
757bd59e6c6SAmlan Barua #endif
7583c3a9c75SAmlan Barua 
7593c3a9c75SAmlan Barua  case 3:
760bd59e6c6SAmlan Barua       #if defined(PETSC_USE_COMPLEX)
761bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
762bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
763bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
764bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
765bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
766bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
767bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
768bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
769bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
770bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
771bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
772bd59e6c6SAmlan Barua       break;
773bd59e6c6SAmlan Barua #else
77451d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
77551d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
77651d1eed7SAmlan Barua 
77751d1eed7SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
77851d1eed7SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
77951d1eed7SAmlan Barua       printf("Val local_0_start = %ld\n",local_0_start);
78051d1eed7SAmlan Barua 
78151d1eed7SAmlan Barua       if (dim[2]%2==0)
78251d1eed7SAmlan Barua         NM = dim[2]+2;
78351d1eed7SAmlan Barua       else
78451d1eed7SAmlan Barua         NM = dim[2]+1;
78551d1eed7SAmlan Barua 
78651d1eed7SAmlan Barua       for (i=0;i<local_n0;i++){
78751d1eed7SAmlan Barua          for (j=0;j<dim[1];j++){
78851d1eed7SAmlan Barua             for (k=0;k<dim[2];k++){
78951d1eed7SAmlan Barua                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
79051d1eed7SAmlan Barua                tempindx1 = i*dim[1]*NM + j*NM + k;
79151d1eed7SAmlan Barua                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
79251d1eed7SAmlan Barua                indx2[tempindx]=low+tempindx1;
79351d1eed7SAmlan Barua             }
79451d1eed7SAmlan Barua            // printf("Val tempindx1 = %d\n",tempindx1);
79551d1eed7SAmlan Barua            // printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
79651d1eed7SAmlan Barua            // printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
79751d1eed7SAmlan Barua            // printf("-------------------------\n",indx2[tempindx],rank);
79851d1eed7SAmlan Barua         }
79951d1eed7SAmlan Barua      }
80051d1eed7SAmlan Barua 
80151d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
80251d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
80351d1eed7SAmlan Barua 
80451d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
80551d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
80651d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
80751d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
808b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
809b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
810b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
811b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
8123c3a9c75SAmlan Barua       break;
813bd59e6c6SAmlan Barua #endif
8143c3a9c75SAmlan Barua 
8153c3a9c75SAmlan Barua  default:
816bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
817bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
818bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
819bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
820bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
821bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
822bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
823bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
824bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
826bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
827bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
828bd59e6c6SAmlan Barua 
829bd59e6c6SAmlan Barua 
830bd59e6c6SAmlan Barua #else
831e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
832e5d7f247SAmlan Barua       printf("The value of temp is %ld\n",temp);
833e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
834e5d7f247SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
835e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
836e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
837e5d7f247SAmlan Barua 
838e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
839e5d7f247SAmlan Barua       printf("The value of partial dim is %d\n",partial_dim);
840e5d7f247SAmlan Barua 
841e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
842e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
843e5d7f247SAmlan Barua       printf("Val local_0_start = %ld\n",local_0_start);
844e5d7f247SAmlan Barua 
845e5d7f247SAmlan Barua       if (dim[ndim-1]%2==0)
846ba6e06d0SAmlan Barua         NM = 2;
847e5d7f247SAmlan Barua       else
848ba6e06d0SAmlan Barua         NM = 1;
849e5d7f247SAmlan Barua 
8506971673cSAmlan Barua       j = low;
8516971673cSAmlan Barua       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
8526971673cSAmlan Barua          {
8536971673cSAmlan Barua           indx1[i] = local_0_start*partial_dim + i;
8546971673cSAmlan Barua           indx2[i] = j;
855ba6e06d0SAmlan Barua           //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
8566971673cSAmlan Barua           if (k%dim[ndim-1]==0)
8576971673cSAmlan Barua             { j+=NM;}
8586971673cSAmlan Barua           j++;
8596971673cSAmlan Barua          }
8606971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
8616971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
8626971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
8636971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8646971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8656971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
866b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
867b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
868b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
869b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
8703c3a9c75SAmlan Barua       break;
871bd59e6c6SAmlan Barua #endif
8723c3a9c75SAmlan Barua   }
873e81bb599SAmlan Barua  }
8743c3a9c75SAmlan Barua 
8753c3a9c75SAmlan Barua  return 0;
8763c3a9c75SAmlan Barua }
8773c3a9c75SAmlan Barua EXTERN_C_END
8783c3a9c75SAmlan Barua 
8793c3a9c75SAmlan Barua #undef __FUNCT__
8803c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT"
8813c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
8823c3a9c75SAmlan Barua {
8833c3a9c75SAmlan Barua   PetscErrorCode ierr;
8843c3a9c75SAmlan Barua   PetscFunctionBegin;
8853c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
8863c3a9c75SAmlan Barua   PetscFunctionReturn(0);
8873c3a9c75SAmlan Barua }
88854efbe56SHong Zhang 
8895b3e41ffSAmlan Barua /*
8905b3e41ffSAmlan Barua       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
8915b3e41ffSAmlan Barua   Input A, x, y
8925b3e41ffSAmlan Barua   A - FFTW matrix
8935b3e41ffSAmlan Barua   x - FFTW vector
8945b3e41ffSAmlan Barua   y - PETSc vector that user can read
8955b3e41ffSAmlan Barua   Options Database Keys:
8965b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
8975b3e41ffSAmlan Barua 
8985b3e41ffSAmlan Barua    Level: intermediate
8995b3e41ffSAmlan Barua 
9003c3a9c75SAmlan Barua */
9013c3a9c75SAmlan Barua 
9023c3a9c75SAmlan Barua EXTERN_C_BEGIN
9033c3a9c75SAmlan Barua #undef __FUNCT__
9045b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW"
9055b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
9065b3e41ffSAmlan Barua {
9075b3e41ffSAmlan Barua   PetscErrorCode ierr;
9085b3e41ffSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
9095b3e41ffSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
9105b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
9115b3e41ffSAmlan Barua   PetscInt       N=fft->N, N1, n1 ,NM;
912b223cc97SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
913b223cc97SAmlan Barua   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
914ba6e06d0SAmlan Barua   PetscInt       i,j,k,rank,size,partial_dim;
9155b3e41ffSAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
916ba6e06d0SAmlan Barua   ptrdiff_t      local_n1,local_1_start,temp;
9175b3e41ffSAmlan Barua   VecScatter     vecscat;
9185b3e41ffSAmlan Barua   IS             list1,list2;
9195b3e41ffSAmlan Barua 
9205b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
9215b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
922cfe1ae98SAmlan Barua   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);
9235b3e41ffSAmlan Barua 
924e81bb599SAmlan Barua   if (size==1){
9257536937bSAmlan Barua /*
9265b3e41ffSAmlan Barua     switch (ndim){
9275b3e41ffSAmlan Barua     case 1:
928e81bb599SAmlan Barua            ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
929e81bb599SAmlan Barua           for (i=0;i<dim[0];i++)
930e81bb599SAmlan Barua              {
931e81bb599SAmlan Barua               indx1[i] = i;
932e81bb599SAmlan Barua              }
933e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
934e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
935e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
936e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
937e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
938b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
939b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
940e81bb599SAmlan Barua           break;
941e81bb599SAmlan Barua 
942e81bb599SAmlan Barua     case 2:
943e81bb599SAmlan Barua          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
944e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
945e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
946e81bb599SAmlan Barua                 indx1[i*dim[1]+j] = i*dim[1] + j;
947e81bb599SAmlan Barua              }
948e81bb599SAmlan Barua           }
949e81bb599SAmlan Barua          ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
950e81bb599SAmlan Barua          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
951e81bb599SAmlan Barua          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
952e81bb599SAmlan Barua          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
953e81bb599SAmlan Barua          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
954b223cc97SAmlan Barua          ierr = ISDestroy(&list1);CHKERRQ(ierr);
955b223cc97SAmlan Barua          ierr = PetscFree(indx1);CHKERRQ(ierr);
956e81bb599SAmlan Barua          break;
957e81bb599SAmlan Barua     case 3:
958e81bb599SAmlan Barua          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
959e81bb599SAmlan Barua          for (i=0;i<dim[0];i++){
960e81bb599SAmlan Barua             for (j=0;j<dim[1];j++){
961e81bb599SAmlan Barua                for (k=0;k<dim[2];k++){
962e81bb599SAmlan Barua                   indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
963e81bb599SAmlan Barua                }
964e81bb599SAmlan Barua             }
965e81bb599SAmlan Barua          }
966e81bb599SAmlan Barua          ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
967e81bb599SAmlan Barua          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
968e81bb599SAmlan Barua          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
969e81bb599SAmlan Barua          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
970e81bb599SAmlan Barua          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
971b223cc97SAmlan Barua          ierr = ISDestroy(&list1);CHKERRQ(ierr);
972b223cc97SAmlan Barua          ierr = PetscFree(indx1);CHKERRQ(ierr);
973e81bb599SAmlan Barua          break;
974e81bb599SAmlan Barua     default:
9757536937bSAmlan Barua */
9766971673cSAmlan Barua          ierr = ISCreateStride(comm,N,0,1,&list1);
9776971673cSAmlan Barua          //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF);
9786971673cSAmlan Barua          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
9796971673cSAmlan Barua          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9806971673cSAmlan Barua          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9816971673cSAmlan Barua          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
982b223cc97SAmlan Barua          ierr = ISDestroy(&list1);CHKERRQ(ierr);
9837536937bSAmlan Barua   //       break;
9847536937bSAmlan Barua    // }
985e81bb599SAmlan Barua   }
986e81bb599SAmlan Barua   else{
987e81bb599SAmlan Barua 
988e81bb599SAmlan Barua  switch (ndim){
989e81bb599SAmlan Barua  case 1:
990*64657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
991*64657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
992*64657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
993*64657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
994*64657d84SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
995*64657d84SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
996*64657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
997*64657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
998*64657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
999*64657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1000*64657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1001*64657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1002*64657d84SAmlan Barua       break;
1003*64657d84SAmlan Barua 
1004*64657d84SAmlan Barua #else
10056a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
1006*64657d84SAmlan Barua #endif
10075b3e41ffSAmlan Barua   break;
10085b3e41ffSAmlan Barua  case 2:
1009bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1010bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1011bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1012bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
1013bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1014bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1015bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1016bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1017bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1018bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1019bd59e6c6SAmlan Barua       break;
1020bd59e6c6SAmlan Barua #else
10215b3e41ffSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
10225b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
10235b3e41ffSAmlan Barua 
10245b3e41ffSAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
10255b3e41ffSAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
10265b3e41ffSAmlan Barua      printf("Val local_0_start = %ld\n",local_0_start);
10275b3e41ffSAmlan Barua 
10285b3e41ffSAmlan Barua      if (dim[1]%2==0)
10295b3e41ffSAmlan Barua       NM = dim[1]+2;
10305b3e41ffSAmlan Barua     else
10315b3e41ffSAmlan Barua       NM = dim[1]+1;
10325b3e41ffSAmlan Barua 
10335b3e41ffSAmlan Barua 
10345b3e41ffSAmlan Barua 
10355b3e41ffSAmlan Barua      for (i=0;i<local_n0;i++){
10365b3e41ffSAmlan Barua         for (j=0;j<dim[1];j++){
10375b3e41ffSAmlan Barua             tempindx = i*dim[1] + j;
10385b3e41ffSAmlan Barua             tempindx1 = i*NM + j;
10395b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
10405b3e41ffSAmlan Barua             indx2[tempindx]=low+tempindx1;
1041cfe1ae98SAmlan Barua        //     printf("Val tempindx1 = %d\n",tempindx1);
1042cfe1ae98SAmlan Barua        //     printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1043cfe1ae98SAmlan Barua        //     printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1044cfe1ae98SAmlan Barua        //     printf("-------------------------\n",indx2[tempindx],rank);
10455b3e41ffSAmlan Barua         }
10465b3e41ffSAmlan Barua      }
10475b3e41ffSAmlan Barua 
10485b3e41ffSAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
10495b3e41ffSAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
10505b3e41ffSAmlan Barua 
10515b3e41ffSAmlan Barua      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
10525b3e41ffSAmlan Barua      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10535b3e41ffSAmlan Barua      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10545b3e41ffSAmlan Barua      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1055b223cc97SAmlan Barua      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1056b223cc97SAmlan Barua      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1057b223cc97SAmlan Barua      ierr = PetscFree(indx1);CHKERRQ(ierr);
1058b223cc97SAmlan Barua      ierr = PetscFree(indx2);CHKERRQ(ierr);
10595b3e41ffSAmlan Barua   break;
1060bd59e6c6SAmlan Barua #endif
10615b3e41ffSAmlan Barua  case 3:
1062bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1063bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1064bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1065bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1066bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1067bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1068bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1069bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1070bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1071bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1072bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1073bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1074bd59e6c6SAmlan Barua       break;
1075bd59e6c6SAmlan Barua #else
1076bd59e6c6SAmlan Barua 
107751d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
107851d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
107951d1eed7SAmlan Barua 
108051d1eed7SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
108151d1eed7SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
108251d1eed7SAmlan Barua      printf("Val local_0_start = %ld\n",local_0_start);
108351d1eed7SAmlan Barua 
108451d1eed7SAmlan Barua      if (dim[2]%2==0)
108551d1eed7SAmlan Barua       NM = dim[2]+2;
108651d1eed7SAmlan Barua     else
108751d1eed7SAmlan Barua       NM = dim[2]+1;
108851d1eed7SAmlan Barua 
108951d1eed7SAmlan Barua      for (i=0;i<local_n0;i++){
109051d1eed7SAmlan Barua         for (j=0;j<dim[1];j++){
109151d1eed7SAmlan Barua            for (k=0;k<dim[2];k++){
109251d1eed7SAmlan Barua               tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
109351d1eed7SAmlan Barua               tempindx1 = i*dim[1]*NM + j*NM + k;
109451d1eed7SAmlan Barua               indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
109551d1eed7SAmlan Barua               indx2[tempindx]=low+tempindx1;
109651d1eed7SAmlan Barua            }
109751d1eed7SAmlan Barua         //    printf("Val tempindx1 = %d\n",tempindx1);
109851d1eed7SAmlan Barua         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
109951d1eed7SAmlan Barua         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
110051d1eed7SAmlan Barua         //    printf("-------------------------\n",indx2[tempindx],rank);
110151d1eed7SAmlan Barua         }
110251d1eed7SAmlan Barua      }
110351d1eed7SAmlan Barua 
110451d1eed7SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
110551d1eed7SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
110651d1eed7SAmlan Barua 
110751d1eed7SAmlan Barua      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
110851d1eed7SAmlan Barua      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
110951d1eed7SAmlan Barua      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
111051d1eed7SAmlan Barua      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1111b223cc97SAmlan Barua      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1112b223cc97SAmlan Barua      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1113b223cc97SAmlan Barua      ierr = PetscFree(indx1);CHKERRQ(ierr);
1114b223cc97SAmlan Barua      ierr = PetscFree(indx2);CHKERRQ(ierr);
11155b3e41ffSAmlan Barua   break;
1116bd59e6c6SAmlan Barua #endif
11175b3e41ffSAmlan Barua   default:
1118bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1119bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1120bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1121bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1122bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1123bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1124bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1125bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1126bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1127bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1128bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1129bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1130bd59e6c6SAmlan Barua #else
1131ba6e06d0SAmlan Barua      temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1132ba6e06d0SAmlan Barua      printf("The value of temp is %ld\n",temp);
1133ba6e06d0SAmlan Barua      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1134ba6e06d0SAmlan Barua      alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1135ba6e06d0SAmlan Barua      N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1136ba6e06d0SAmlan Barua      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1137ba6e06d0SAmlan Barua 
1138ba6e06d0SAmlan Barua      partial_dim = fftw->partial_dim;
1139ba6e06d0SAmlan Barua      printf("The value of partial dim is %d\n",partial_dim);
1140ba6e06d0SAmlan Barua 
1141ba6e06d0SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1142ba6e06d0SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1143ba6e06d0SAmlan Barua      printf("Val local_0_start = %ld\n",local_0_start);
1144ba6e06d0SAmlan Barua 
1145ba6e06d0SAmlan Barua      if (dim[ndim-1]%2==0)
1146ba6e06d0SAmlan Barua        NM = 2;
1147ba6e06d0SAmlan Barua      else
1148ba6e06d0SAmlan Barua        NM = 1;
1149ba6e06d0SAmlan Barua 
1150ba6e06d0SAmlan Barua      j = low;
1151ba6e06d0SAmlan Barua      for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1152ba6e06d0SAmlan Barua         {
1153ba6e06d0SAmlan Barua          indx1[i] = local_0_start*partial_dim + i;
1154ba6e06d0SAmlan Barua          indx2[i] = j;
1155ba6e06d0SAmlan Barua          //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
1156ba6e06d0SAmlan Barua          if (k%dim[ndim-1]==0)
1157ba6e06d0SAmlan Barua            { j+=NM;}
1158ba6e06d0SAmlan Barua          j++;
1159ba6e06d0SAmlan Barua         }
1160ba6e06d0SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1161ba6e06d0SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1162ba6e06d0SAmlan Barua 
1163ba6e06d0SAmlan Barua       //ISView(list1,PETSC_VIEWER_STDOUT_SELF);
1164ba6e06d0SAmlan Barua 
1165ba6e06d0SAmlan Barua 
1166ba6e06d0SAmlan Barua      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1167ba6e06d0SAmlan Barua      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1168ba6e06d0SAmlan Barua      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1169ba6e06d0SAmlan Barua      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1170b223cc97SAmlan Barua      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1171b223cc97SAmlan Barua      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1172b223cc97SAmlan Barua      ierr = PetscFree(indx1);CHKERRQ(ierr);
1173b223cc97SAmlan Barua      ierr = PetscFree(indx2);CHKERRQ(ierr);
1174ba6e06d0SAmlan Barua 
11755b3e41ffSAmlan Barua      break;
1176bd59e6c6SAmlan Barua #endif
11775b3e41ffSAmlan Barua  }
1178e81bb599SAmlan Barua  }
11795b3e41ffSAmlan Barua  return 0;
11805b3e41ffSAmlan Barua }
11815b3e41ffSAmlan Barua EXTERN_C_END
11825b3e41ffSAmlan Barua 
11835b3e41ffSAmlan Barua EXTERN_C_BEGIN
11845b3e41ffSAmlan Barua #undef __FUNCT__
11853c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
11863c3a9c75SAmlan Barua /*
11873c3a9c75SAmlan Barua       MatCreate_FFTW - Creates a matrix object that provides FFT
11883c3a9c75SAmlan Barua   via the external package FFTW
11893c3a9c75SAmlan Barua   Options Database Keys:
11903c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
11913c3a9c75SAmlan Barua 
11923c3a9c75SAmlan Barua    Level: intermediate
11933c3a9c75SAmlan Barua 
11943c3a9c75SAmlan Barua */
11953c3a9c75SAmlan Barua 
1196b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A)
1197b2b77a04SHong Zhang {
1198b2b77a04SHong Zhang   PetscErrorCode ierr;
1199b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
1200b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1201b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1202b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1203b2b77a04SHong Zhang   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1204b2b77a04SHong Zhang   PetscBool      flg;
1205b3a17365SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr,N1;
120611902ff2SHong Zhang   PetscMPIInt    size,rank;
1207b3a17365SAmlan Barua   ptrdiff_t      *pdim, temp;
12084a16a297SSean Farley   ptrdiff_t      local_n1,local_1_start,local_1_end;
1209b2b77a04SHong Zhang 
1210b2b77a04SHong Zhang   PetscFunctionBegin;
1211b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
121211902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
121354dd5118SAmlan Barua   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1214c92418ccSAmlan Barua 
121511902ff2SHong Zhang   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
121611902ff2SHong Zhang   pdim[0] = dim[0];
121711902ff2SHong Zhang   for(ctr=1;ctr<ndim;ctr++)
121811902ff2SHong Zhang       {
12196882372aSHong Zhang           partial_dim *= dim[ctr];
122011902ff2SHong Zhang           pdim[ctr] = dim[ctr];
12216882372aSHong Zhang       }
12223c3a9c75SAmlan Barua 
1223b2b77a04SHong Zhang   if (size == 1) {
1224e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1225b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1226b2b77a04SHong Zhang     n = N;
1227e81bb599SAmlan Barua #else
1228e81bb599SAmlan Barua     int tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1];
1229e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1230e81bb599SAmlan Barua     n = tot_dim;
1231e81bb599SAmlan Barua #endif
1232e81bb599SAmlan Barua 
1233b2b77a04SHong Zhang   } else {
1234b223cc97SAmlan Barua     ptrdiff_t alloc_local,local_n0,local_0_start;//local_n1,local_1_end;
1235b2b77a04SHong Zhang     switch (ndim){
1236b2b77a04SHong Zhang     case 1:
12373c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12383c3a9c75SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1239e5d7f247SAmlan Barua #else
12406882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
12416882372aSHong Zhang       n = (PetscInt)local_n0;
12426882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1243e5d7f247SAmlan Barua #endif
12443c3a9c75SAmlan Barua //      PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW);
1245b2b77a04SHong Zhang       break;
1246b2b77a04SHong Zhang     case 2:
12475b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
1248b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1249b2b77a04SHong Zhang       /*
1250b2b77a04SHong Zhang        PetscMPIInt    rank;
1251b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1252b2b77a04SHong Zhang        PetscSynchronizedFlush(comm);
1253b2b77a04SHong Zhang        */
1254b2b77a04SHong Zhang       n = (PetscInt)local_n0*dim[1];
1255b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
12565b3e41ffSAmlan Barua #else
12575b3e41ffSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
12585b3e41ffSAmlan Barua       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
12595b3e41ffSAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
12605b3e41ffSAmlan Barua #endif
1261b2b77a04SHong Zhang       break;
1262b2b77a04SHong Zhang     case 3:
126311902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
126451d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
126551d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
12666882372aSHong Zhang       n = (PetscInt)local_n0*dim[1]*dim[2];
12676882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
126851d1eed7SAmlan Barua #else
126951d1eed7SAmlan Barua       printf("The code comes here\n");
127051d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
127151d1eed7SAmlan Barua       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
127251d1eed7SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
127351d1eed7SAmlan Barua #endif
1274b2b77a04SHong Zhang       break;
1275b2b77a04SHong Zhang     default:
1276b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
127711902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1278c92418ccSAmlan Barua //      printf("The value of alloc local is %ld from process %d\n",alloc_local,rank);
127911902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
12806882372aSHong Zhang       n = (PetscInt)local_n0*partial_dim;
128111902ff2SHong Zhang //      printf("New partial dimension is %d %d %d",n,N,ndim);
12826882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1283b3a17365SAmlan Barua #else
1284b3a17365SAmlan Barua       temp = pdim[ndim-1];
1285b3a17365SAmlan Barua       pdim[ndim-1]= temp/2 + 1;
1286b3a17365SAmlan Barua       printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]);
1287b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1288b3a17365SAmlan Barua       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1289b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1290b3a17365SAmlan Barua       pdim[ndim-1] = temp;
1291b3a17365SAmlan Barua       printf("For Multi dim case n = %d, N1  = %d\n",n,N1);
1292b3a17365SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1293b3a17365SAmlan Barua #endif
1294b2b77a04SHong Zhang       break;
1295b2b77a04SHong Zhang     }
1296b2b77a04SHong Zhang   }
1297b2b77a04SHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1298b2b77a04SHong Zhang   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1299b2b77a04SHong Zhang   fft->data = (void*)fftw;
1300b2b77a04SHong Zhang 
1301b2b77a04SHong Zhang   fft->n           = n;
1302c92418ccSAmlan Barua   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1303e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
1304c92418ccSAmlan Barua   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1305b1301b2eSHong Zhang   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1306c92418ccSAmlan Barua 
1307b2b77a04SHong Zhang   fftw->p_forward  = 0;
1308b2b77a04SHong Zhang   fftw->p_backward = 0;
1309b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1310b2b77a04SHong Zhang 
1311b2b77a04SHong Zhang   if (size == 1){
1312b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1313b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1314b2b77a04SHong Zhang   } else {
1315b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1316b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1317b2b77a04SHong Zhang   }
1318b2b77a04SHong Zhang   fft->matdestroy          = MatDestroy_FFTW;
13196a506ed8SAmlan Barua // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D
1320b2b77a04SHong Zhang   A->ops->getvecs       = MatGetVecs_FFTW;
1321b2b77a04SHong Zhang   A->assembled          = PETSC_TRUE;
13223c3a9c75SAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
13235b3e41ffSAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
1324b2b77a04SHong Zhang 
1325b2b77a04SHong Zhang   /* get runtime options */
1326b2b77a04SHong Zhang   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1327b2b77a04SHong Zhang     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1328b2b77a04SHong Zhang     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1329b2b77a04SHong Zhang   PetscOptionsEnd();
1330b2b77a04SHong Zhang   PetscFunctionReturn(0);
1331b2b77a04SHong Zhang }
1332b2b77a04SHong Zhang EXTERN_C_END
13333c3a9c75SAmlan Barua 
13343c3a9c75SAmlan Barua 
13353c3a9c75SAmlan Barua 
13363c3a9c75SAmlan Barua 
1337