xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 57625b84b672f8833d348c39f6a308a642e6be51)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14e5d7f247SAmlan Barua   PetscInt   partial_dim;
15b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
16b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
19b2b77a04SHong Zhang } Mat_FFTW;
20b2b77a04SHong Zhang 
21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
284f7415efSAmlan Barua extern PetscErrorCode MatGetVecsFFT_FFTW(Mat,Vec*,Vec*,Vec*);
29b2b77a04SHong Zhang 
30b2b77a04SHong Zhang #undef __FUNCT__
31b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
32b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
33b2b77a04SHong Zhang {
34b2b77a04SHong Zhang   PetscErrorCode ierr;
35b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
36b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
37b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
38b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
39b2b77a04SHong Zhang 
40b2b77a04SHong Zhang   PetscFunctionBegin;
41b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
42b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
43b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
44b2b77a04SHong Zhang     switch (ndim){
45b2b77a04SHong Zhang     case 1:
4658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
47b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
4858a912c5SAmlan Barua #else
4958a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
5058a912c5SAmlan Barua #endif
51b2b77a04SHong Zhang       break;
52b2b77a04SHong Zhang     case 2:
5358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
54b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
5558a912c5SAmlan Barua #else
5658a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
5758a912c5SAmlan Barua #endif
58b2b77a04SHong Zhang       break;
59b2b77a04SHong Zhang     case 3:
6058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
61b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6258a912c5SAmlan Barua #else
6358a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
6458a912c5SAmlan Barua #endif
65b2b77a04SHong Zhang       break;
66b2b77a04SHong Zhang     default:
6758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
68b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6958a912c5SAmlan Barua #else
7058a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
7158a912c5SAmlan Barua #endif
72b2b77a04SHong Zhang       break;
73b2b77a04SHong Zhang     }
74b2b77a04SHong Zhang     fftw->finarray  = x_array;
75b2b77a04SHong Zhang     fftw->foutarray = y_array;
76b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
77b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
78b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
79b2b77a04SHong Zhang   } else { /* use existing plan */
80b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
81b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
82b2b77a04SHong Zhang     } else {
83b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
84b2b77a04SHong Zhang     }
85b2b77a04SHong Zhang   }
86b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
87b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
88b2b77a04SHong Zhang   PetscFunctionReturn(0);
89b2b77a04SHong Zhang }
90b2b77a04SHong Zhang 
91b2b77a04SHong Zhang #undef __FUNCT__
92b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
93b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
94b2b77a04SHong Zhang {
95b2b77a04SHong Zhang   PetscErrorCode ierr;
96b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
97b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
98b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
99b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
100b2b77a04SHong Zhang 
101b2b77a04SHong Zhang   PetscFunctionBegin;
102b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
103b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
104b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
105b2b77a04SHong Zhang     switch (ndim){
106b2b77a04SHong Zhang     case 1:
10758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
108b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
10958a912c5SAmlan Barua #else
110e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
11158a912c5SAmlan Barua #endif
112b2b77a04SHong Zhang       break;
113b2b77a04SHong Zhang     case 2:
11458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
115b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
11658a912c5SAmlan Barua #else
117e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
11858a912c5SAmlan Barua #endif
119b2b77a04SHong Zhang       break;
120b2b77a04SHong Zhang     case 3:
12158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
122b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12358a912c5SAmlan Barua #else
124e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
12558a912c5SAmlan Barua #endif
126b2b77a04SHong Zhang       break;
127b2b77a04SHong Zhang     default:
12858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
129b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
13058a912c5SAmlan Barua #else
131e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
13258a912c5SAmlan Barua #endif
133b2b77a04SHong Zhang       break;
134b2b77a04SHong Zhang     }
135b2b77a04SHong Zhang     fftw->binarray  = x_array;
136b2b77a04SHong Zhang     fftw->boutarray = y_array;
137b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
138b2b77a04SHong Zhang   } else { /* use existing plan */
139b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
140b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
141b2b77a04SHong Zhang     } else {
142b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
143b2b77a04SHong Zhang     }
144b2b77a04SHong Zhang   }
145b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
146b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
147b2b77a04SHong Zhang   PetscFunctionReturn(0);
148b2b77a04SHong Zhang }
149b2b77a04SHong Zhang 
150b2b77a04SHong Zhang #undef __FUNCT__
151b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
152b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
153b2b77a04SHong Zhang {
154b2b77a04SHong Zhang   PetscErrorCode ierr;
155b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
156b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
157b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
158c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
159b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
160b2b77a04SHong Zhang 
161b2b77a04SHong Zhang   PetscFunctionBegin;
162b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
163b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
164b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
165b2b77a04SHong Zhang     switch (ndim){
166b2b77a04SHong Zhang     case 1:
1673c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
168b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
169ae0a50aaSHong Zhang #else
170ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
1713c3a9c75SAmlan Barua #endif
172b2b77a04SHong Zhang       break;
173b2b77a04SHong Zhang     case 2:
1743c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
175b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1763c3a9c75SAmlan Barua #else
1773c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1783c3a9c75SAmlan Barua #endif
179b2b77a04SHong Zhang       break;
180b2b77a04SHong Zhang     case 3:
1813c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
182b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1833c3a9c75SAmlan Barua #else
18451d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1853c3a9c75SAmlan Barua #endif
186b2b77a04SHong Zhang       break;
187b2b77a04SHong Zhang     default:
1883c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
189c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1903c3a9c75SAmlan Barua #else
1913c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1923c3a9c75SAmlan Barua #endif
193b2b77a04SHong Zhang       break;
194b2b77a04SHong Zhang     }
195b2b77a04SHong Zhang     fftw->finarray  = x_array;
196b2b77a04SHong Zhang     fftw->foutarray = y_array;
197b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
198b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
199b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
200b2b77a04SHong Zhang   } else { /* use existing plan */
201b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
202b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
203b2b77a04SHong Zhang     } else {
204b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
205b2b77a04SHong Zhang     }
206b2b77a04SHong Zhang   }
207b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
208b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
209b2b77a04SHong Zhang   PetscFunctionReturn(0);
210b2b77a04SHong Zhang }
211b2b77a04SHong Zhang 
212b2b77a04SHong Zhang #undef __FUNCT__
213b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
214b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
215b2b77a04SHong Zhang {
216b2b77a04SHong Zhang   PetscErrorCode ierr;
217b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
218b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
219b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
220c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
221b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
222b2b77a04SHong Zhang 
223b2b77a04SHong Zhang   PetscFunctionBegin;
224b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
225b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
226b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
227b2b77a04SHong Zhang     switch (ndim){
228b2b77a04SHong Zhang     case 1:
2293c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
230b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
231ae0a50aaSHong Zhang #else
232ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
2333c3a9c75SAmlan Barua #endif
234b2b77a04SHong Zhang       break;
235b2b77a04SHong Zhang     case 2:
2363c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
237b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2383c3a9c75SAmlan Barua #else
2393c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2403c3a9c75SAmlan Barua #endif
241b2b77a04SHong Zhang       break;
242b2b77a04SHong Zhang     case 3:
2433c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
244b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2453c3a9c75SAmlan Barua #else
2463c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2473c3a9c75SAmlan Barua #endif
248b2b77a04SHong Zhang       break;
249b2b77a04SHong Zhang     default:
2503c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
251c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2523c3a9c75SAmlan Barua #else
2533c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2543c3a9c75SAmlan Barua #endif
255b2b77a04SHong Zhang       break;
256b2b77a04SHong Zhang     }
257b2b77a04SHong Zhang     fftw->binarray  = x_array;
258b2b77a04SHong Zhang     fftw->boutarray = y_array;
259b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
260b2b77a04SHong Zhang   } else { /* use existing plan */
261b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
262b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
263b2b77a04SHong Zhang     } else {
264b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
265b2b77a04SHong Zhang     }
266b2b77a04SHong Zhang   }
267b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
268b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
269b2b77a04SHong Zhang   PetscFunctionReturn(0);
270b2b77a04SHong Zhang }
271b2b77a04SHong Zhang 
272b2b77a04SHong Zhang #undef __FUNCT__
273b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
274b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
275b2b77a04SHong Zhang {
276b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
277b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
278b2b77a04SHong Zhang   PetscErrorCode ierr;
279b2b77a04SHong Zhang 
280b2b77a04SHong Zhang   PetscFunctionBegin;
281b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
282b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
283b1301b2eSHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
284bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
285b2b77a04SHong Zhang   PetscFunctionReturn(0);
286b2b77a04SHong Zhang }
287b2b77a04SHong Zhang 
288c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
289b2b77a04SHong Zhang #undef __FUNCT__
290b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
291b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
292b2b77a04SHong Zhang {
293b2b77a04SHong Zhang   PetscErrorCode  ierr;
294b2b77a04SHong Zhang   PetscScalar     *array;
295b2b77a04SHong Zhang 
296b2b77a04SHong Zhang   PetscFunctionBegin;
297b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
298b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
299b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
300b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
301b2b77a04SHong Zhang   PetscFunctionReturn(0);
302b2b77a04SHong Zhang }
303b2b77a04SHong Zhang 
304b2b77a04SHong Zhang #undef __FUNCT__
3053c3a9c75SAmlan Barua #define __FUNCT__ "MatGetVecs1DC_FFTW"
306c92418ccSAmlan Barua /*
307c92418ccSAmlan Barua    MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the
308c92418ccSAmlan Barua      parallel layout determined by FFTW-1D
309c92418ccSAmlan Barua 
310c92418ccSAmlan Barua */
3116a506ed8SAmlan Barua PetscErrorCode  MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bout)
312c92418ccSAmlan Barua {
313c92418ccSAmlan Barua   PetscErrorCode ierr;
314c92418ccSAmlan Barua   PetscMPIInt    size,rank;
315c92418ccSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
316c92418ccSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
317c92418ccSAmlan Barua //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
318c92418ccSAmlan Barua   PetscInt       N=fft->N;
319c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
320c92418ccSAmlan Barua   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
321c92418ccSAmlan Barua   ptrdiff_t      f_local_n1,f_local_1_end;
322c92418ccSAmlan Barua   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
323c92418ccSAmlan Barua   ptrdiff_t      b_local_n1,b_local_1_end;
324ae0a50aaSHong Zhang   fftw_complex   *data_fin,*data_fout,*data_bout;
325c92418ccSAmlan Barua 
326c92418ccSAmlan Barua   PetscFunctionBegin;
327c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
328c92418ccSAmlan Barua   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
329c92418ccSAmlan Barua #endif
330c92418ccSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
331c92418ccSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
332c92418ccSAmlan Barua   if (size == 1){
333c92418ccSAmlan Barua     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
334c92418ccSAmlan Barua   }
335c92418ccSAmlan Barua   else {
336c92418ccSAmlan Barua       if (ndim>1){
337c92418ccSAmlan Barua         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
338c92418ccSAmlan Barua       else {
339c92418ccSAmlan Barua           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
340c92418ccSAmlan Barua           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
341c92418ccSAmlan Barua           if (fin) {
342c92418ccSAmlan Barua             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
343c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
344c92418ccSAmlan Barua             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
345c92418ccSAmlan Barua           }
346c92418ccSAmlan Barua           if (fout) {
347c92418ccSAmlan Barua             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
348c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
349c92418ccSAmlan Barua             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
350c92418ccSAmlan Barua           }
351c92418ccSAmlan Barua           if (bout) {
352c92418ccSAmlan Barua             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
353c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
354c92418ccSAmlan Barua             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
355c92418ccSAmlan Barua           }
356c92418ccSAmlan Barua   }
357c92418ccSAmlan Barua   if (fin){
358c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
359c92418ccSAmlan Barua   }
360c92418ccSAmlan Barua   if (fout){
361c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
362c92418ccSAmlan Barua   }
363c92418ccSAmlan Barua   if (bout){
364c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
365c92418ccSAmlan Barua   }
366c92418ccSAmlan Barua   PetscFunctionReturn(0);
367c92418ccSAmlan Barua }
368c92418ccSAmlan Barua 
369c92418ccSAmlan Barua 
370c92418ccSAmlan Barua }
3713c3a9c75SAmlan Barua 
3724f7415efSAmlan Barua 
3734f7415efSAmlan Barua #undef __FUNCT__
3744f7415efSAmlan Barua #define __FUNCT__ "MatGetVecsFFT"
3754f7415efSAmlan Barua PetscErrorCode MatGetVecsFFT(Mat A,Vec *x,Vec *y,Vec *z)
3764f7415efSAmlan Barua {
3774f7415efSAmlan Barua   PetscErrorCode ierr;
3784f7415efSAmlan Barua   PetscFunctionBegin;
3794f7415efSAmlan Barua   ierr = PetscTryMethod(A,"MatGetVecsFFT_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
3804f7415efSAmlan Barua   PetscFunctionReturn(0);
3814f7415efSAmlan Barua }
3824f7415efSAmlan Barua 
3834f7415efSAmlan Barua #undef __FUNCT__
3844f7415efSAmlan Barua #define __FUNCT__ "MatGetVecsFFT_FFTW"
3854f7415efSAmlan Barua /*
3864f7415efSAmlan Barua    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
3874f7415efSAmlan Barua      parallel layout determined by FFTW
3884f7415efSAmlan Barua 
3894f7415efSAmlan Barua    Collective on Mat
3904f7415efSAmlan Barua 
3914f7415efSAmlan Barua    Input Parameter:
3924f7415efSAmlan Barua .  mat - the matrix
3934f7415efSAmlan Barua 
3944f7415efSAmlan Barua    Output Parameter:
3954f7415efSAmlan Barua +   fin - (optional) input vector of forward FFTW
3964f7415efSAmlan Barua -   fout - (optional) output vector of forward FFTW
3974f7415efSAmlan Barua 
3984f7415efSAmlan Barua   Level: advanced
3994f7415efSAmlan Barua 
4004f7415efSAmlan Barua .seealso: MatCreateFFTW()
4014f7415efSAmlan Barua */
4024f7415efSAmlan Barua EXTERN_C_BEGIN
4034f7415efSAmlan Barua PetscErrorCode  MatGetVecsFFT_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
4044f7415efSAmlan Barua {
4054f7415efSAmlan Barua   PetscErrorCode ierr;
4064f7415efSAmlan Barua   PetscMPIInt    size,rank;
4074f7415efSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
4084f7415efSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
4094f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
4104f7415efSAmlan Barua   PetscInt       N=fft->N, N1, n1,vsize;
4114f7415efSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
4124f7415efSAmlan Barua 
4134f7415efSAmlan Barua   PetscFunctionBegin;
4144f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
4154f7415efSAmlan Barua   PetscValidType(A,1);
4164f7415efSAmlan Barua 
4174f7415efSAmlan Barua   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
4184f7415efSAmlan Barua   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
4194f7415efSAmlan Barua   printf("size is %d\n",size);
4204f7415efSAmlan Barua   if (size == 1){ /* sequential case */
4214f7415efSAmlan Barua   printf("Routine is getting called\n");
4224f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4234f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
4244f7415efSAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
4254f7415efSAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
4264f7415efSAmlan Barua #else
4274f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
4284f7415efSAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
4294f7415efSAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);}
4304f7415efSAmlan Barua #endif
4314f7415efSAmlan Barua   } else {
4324f7415efSAmlan Barua     ptrdiff_t      alloc_local,local_n0,local_0_start;
4334f7415efSAmlan Barua     ptrdiff_t      local_n1,local_1_end;
4340c9b18e4SAmlan Barua     fftw_complex   *data_fin,*data_fout,*data_bout;
4354f7415efSAmlan Barua     double         *data_finr,*data_boutr ;
4364f7415efSAmlan Barua     ptrdiff_t      local_1_start,temp;
4374f7415efSAmlan Barua     switch (ndim){
4384f7415efSAmlan Barua           case 1:
439*57625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
440*57625b84SAmlan Barua                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
441*57625b84SAmlan Barua #else
4424f7415efSAmlan Barua                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet");
443*57625b84SAmlan Barua                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
444*57625b84SAmlan Barua                  if (fin) {
445*57625b84SAmlan Barua                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
446*57625b84SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
447*57625b84SAmlan Barua                          (*fin)->ops->destroy = VecDestroy_MPIFFTW;
448*57625b84SAmlan Barua                          }
449*57625b84SAmlan Barua                  if (fout) {
450*57625b84SAmlan Barua                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
451*57625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
452*57625b84SAmlan Barua                           (*fout)->ops->destroy = VecDestroy_MPIFFTW;
453*57625b84SAmlan Barua                          }
454*57625b84SAmlan Barua                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
455*57625b84SAmlan Barua                  if (bout) {
456*57625b84SAmlan Barua                           data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
457*57625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
458*57625b84SAmlan Barua                           (*bout)->ops->destroy = VecDestroy_MPIFFTW;
459*57625b84SAmlan Barua                          }
460*57625b84SAmlan Barua                  break;
461*57625b84SAmlan Barua 
462*57625b84SAmlan Barua 
463*57625b84SAmlan Barua #endif
4644f7415efSAmlan Barua           case 2:
4654f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4664f7415efSAmlan Barua                  alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4674f7415efSAmlan Barua                  N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
4684f7415efSAmlan Barua                  if (fin) {
4694f7415efSAmlan Barua                            data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4704f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
4714f7415efSAmlan Barua                            ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
4724f7415efSAmlan Barua                            //printf("The code comes here with vector size %d\n",vsize);
4734f7415efSAmlan Barua                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4744f7415efSAmlan Barua                           }
4754f7415efSAmlan Barua                  if (bout) {
4764f7415efSAmlan Barua                            data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4774f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
4784f7415efSAmlan Barua                            ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr);
4794f7415efSAmlan Barua                            //printf("The code comes here with vector size %d\n",vsize);
4804f7415efSAmlan Barua                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4814f7415efSAmlan Barua                           }
482*57625b84SAmlan Barua                  if (fout) {
483*57625b84SAmlan Barua                             data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
484*57625b84SAmlan Barua                             ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
485*57625b84SAmlan Barua                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
486*57625b84SAmlan Barua                            }
4874f7415efSAmlan Barua 
4884f7415efSAmlan Barua                            //printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
4894f7415efSAmlan Barua 
4904f7415efSAmlan Barua #else
4914f7415efSAmlan Barua       /* Get local size */
4920c9b18e4SAmlan Barua                  printf("Code works for paralllel 2d complex DFT\n");
4934f7415efSAmlan Barua                  alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
4944f7415efSAmlan Barua                  if (fin) {
4954f7415efSAmlan Barua                            data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4964f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
4974f7415efSAmlan Barua                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4984f7415efSAmlan Barua                           }
4994f7415efSAmlan Barua                  if (fout) {
5004f7415efSAmlan Barua                             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5014f7415efSAmlan Barua                             ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5024f7415efSAmlan Barua                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5034f7415efSAmlan Barua                            }
5044f7415efSAmlan Barua                  if (bout) {
5054f7415efSAmlan Barua                            data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5064f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5074f7415efSAmlan Barua                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5084f7415efSAmlan Barua                           }
5094f7415efSAmlan Barua 
5104f7415efSAmlan Barua      //printf("Hope this does not come here");
5114f7415efSAmlan Barua #endif
5124f7415efSAmlan Barua       break;
5134f7415efSAmlan Barua 
5144f7415efSAmlan Barua           case 3:
5154f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5164f7415efSAmlan Barua                  alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5174f7415efSAmlan Barua                  N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
5184f7415efSAmlan Barua                  if (fin) {
5194f7415efSAmlan Barua                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5204f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
5214f7415efSAmlan Barua                          ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
5224f7415efSAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
5234f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5244f7415efSAmlan Barua                          }
5254f7415efSAmlan Barua                  if (bout) {
5264f7415efSAmlan Barua                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5274f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
5284f7415efSAmlan Barua                         // ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
5294f7415efSAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
5304f7415efSAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5314f7415efSAmlan Barua                           }
532*57625b84SAmlan Barua                  if (fout) {
533*57625b84SAmlan Barua                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
534*57625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
535*57625b84SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
536*57625b84SAmlan Barua                           }
5374f7415efSAmlan Barua 
5384f7415efSAmlan Barua       //printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
5394f7415efSAmlan Barua 
5404f7415efSAmlan Barua #else
5410c9b18e4SAmlan Barua                  alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
5420c9b18e4SAmlan Barua //      printf("The quantity n is %d",n);
5430c9b18e4SAmlan Barua                  if (fin) {
5440c9b18e4SAmlan Barua                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5450c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5460c9b18e4SAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5470c9b18e4SAmlan Barua                          }
5480c9b18e4SAmlan Barua                  if (fout) {
5490c9b18e4SAmlan Barua                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5500c9b18e4SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5510c9b18e4SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5520c9b18e4SAmlan Barua                          }
5530c9b18e4SAmlan Barua                  if (bout) {
5540c9b18e4SAmlan Barua                          data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5550c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5560c9b18e4SAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5570c9b18e4SAmlan Barua                          }
5580c9b18e4SAmlan Barua 
5594f7415efSAmlan Barua #endif
5604f7415efSAmlan Barua                  break;
5614f7415efSAmlan Barua           default:
5624f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5634f7415efSAmlan Barua                  temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
5644f7415efSAmlan Barua                  printf("The value of temp is %ld\n",temp);
5654f7415efSAmlan Barua                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
5664f7415efSAmlan Barua                  alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5674f7415efSAmlan Barua                  N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
5684f7415efSAmlan Barua                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
5694f7415efSAmlan Barua 
5704f7415efSAmlan Barua                  if (fin) {
5714f7415efSAmlan Barua                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5724f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
5734f7415efSAmlan Barua                          ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
5744f7415efSAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
5754f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5764f7415efSAmlan Barua                         }
5774f7415efSAmlan Barua                  if (bout) {
5784f7415efSAmlan Barua                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5794f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
5804f7415efSAmlan Barua                          //ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
5814f7415efSAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
5824f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5834f7415efSAmlan Barua                         }
584*57625b84SAmlan Barua                  if (fout) {
585*57625b84SAmlan Barua                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
586*57625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
587*57625b84SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
588*57625b84SAmlan Barua                         }
5894f7415efSAmlan Barua 
5904f7415efSAmlan Barua #else
5910c9b18e4SAmlan Barua                 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
5920c9b18e4SAmlan Barua                 if (fin) {
5930c9b18e4SAmlan Barua                        data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5940c9b18e4SAmlan Barua                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5950c9b18e4SAmlan Barua                        (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5960c9b18e4SAmlan Barua                        }
5970c9b18e4SAmlan Barua                 if (fout) {
5980c9b18e4SAmlan Barua                          data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5990c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6000c9b18e4SAmlan Barua                          (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6010c9b18e4SAmlan Barua                        }
6020c9b18e4SAmlan Barua                 if (bout) {
6030c9b18e4SAmlan Barua                        data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6040c9b18e4SAmlan Barua                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
6050c9b18e4SAmlan Barua                        (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6060c9b18e4SAmlan Barua                        }
6074f7415efSAmlan Barua 
6080c9b18e4SAmlan Barua 
6090c9b18e4SAmlan Barua 
6104f7415efSAmlan Barua #endif
6114f7415efSAmlan Barua             break;
6124f7415efSAmlan Barua           }
6134f7415efSAmlan Barua   }
6144f7415efSAmlan Barua   PetscFunctionReturn(0);
6154f7415efSAmlan Barua }
6164f7415efSAmlan Barua EXTERN_C_END
6174f7415efSAmlan Barua 
6184f7415efSAmlan Barua 
6194f7415efSAmlan Barua 
620c92418ccSAmlan Barua #undef __FUNCT__
621b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW"
622b2b77a04SHong Zhang /*
623b2b77a04SHong Zhang    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
624b2b77a04SHong Zhang      parallel layout determined by FFTW
625b2b77a04SHong Zhang 
626b2b77a04SHong Zhang    Collective on Mat
627b2b77a04SHong Zhang 
628b2b77a04SHong Zhang    Input Parameter:
629b2b77a04SHong Zhang .  mat - the matrix
630b2b77a04SHong Zhang 
631b2b77a04SHong Zhang    Output Parameter:
632b2b77a04SHong Zhang +   fin - (optional) input vector of forward FFTW
633b2b77a04SHong Zhang -   fout - (optional) output vector of forward FFTW
634b2b77a04SHong Zhang 
635b2b77a04SHong Zhang   Level: advanced
636b2b77a04SHong Zhang 
637b2b77a04SHong Zhang .seealso: MatCreateFFTW()
638b2b77a04SHong Zhang */
639b2b77a04SHong Zhang PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
640b2b77a04SHong Zhang {
641b2b77a04SHong Zhang   PetscErrorCode ierr;
642b2b77a04SHong Zhang   PetscMPIInt    size,rank;
643b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
644b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
645c92418ccSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
6463c3a9c75SAmlan Barua   PetscInt       N=fft->N, N1, n1,vsize;
647e81bb599SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
648b2b77a04SHong Zhang 
649b2b77a04SHong Zhang   PetscFunctionBegin;
650b2b77a04SHong Zhang   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
651b2b77a04SHong Zhang   PetscValidType(A,1);
652b2b77a04SHong Zhang 
653b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
654b2b77a04SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
655b2b77a04SHong Zhang   if (size == 1){ /* sequential case */
656e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX)
657b2b77a04SHong Zhang     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
658b2b77a04SHong Zhang     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
659e5d7f247SAmlan Barua #else
660e81bb599SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
661e81bb599SAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
662e81bb599SAmlan Barua #endif
663b2b77a04SHong Zhang   } else {        /* mpi case */
664b2b77a04SHong Zhang     ptrdiff_t      alloc_local,local_n0,local_0_start;
6656882372aSHong Zhang     ptrdiff_t      local_n1,local_1_end;
666b2b77a04SHong Zhang     fftw_complex   *data_fin,*data_fout;
66751d1eed7SAmlan Barua     double         *data_finr ;
668b3a17365SAmlan Barua     ptrdiff_t      local_1_start,temp;
669c92418ccSAmlan Barua //    PetscInt ctr;
670c92418ccSAmlan Barua //    ptrdiff_t      ndim1,*pdim;
671c92418ccSAmlan Barua //    ndim1=(ptrdiff_t) ndim;
672c92418ccSAmlan Barua //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
67311902ff2SHong Zhang 
674c92418ccSAmlan Barua //    for(ctr=0;ctr<ndim;ctr++)
675f76f76beSAmlan Barua //        {k
676c92418ccSAmlan Barua //           pdim[ctr] = dim[ctr];
677c92418ccSAmlan Barua //       }
678b2b77a04SHong Zhang 
679f76f76beSAmlan Barua 
680f76f76beSAmlan Barua 
681b2b77a04SHong Zhang     switch (ndim){
682b2b77a04SHong Zhang     case 1:
6836882372aSHong Zhang       /* Get local size */
684e5d7f247SAmlan Barua       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
685c92418ccSAmlan Barua //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
6866882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
687*57625b84SAmlan Barua       printf("The values of local_n0 and local_n1 are %d and %d",local_n0,local_n1);
6886882372aSHong Zhang       if (fin) {
6896882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6906882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
6916882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6926882372aSHong Zhang       }
6936882372aSHong Zhang       if (fout) {
6946882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
695*57625b84SAmlan Barua         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6966882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6976882372aSHong Zhang       }
698b2b77a04SHong Zhang       break;
699b2b77a04SHong Zhang     case 2:
7003c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
7013c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
7023c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
7033c3a9c75SAmlan Barua       if (fin) {
7043c3a9c75SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
70554dd5118SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
7063c3a9c75SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
70709dd8a53SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
7083c3a9c75SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
7093c3a9c75SAmlan Barua       }
710*57625b84SAmlan Barua       n1 = 2*local_n1*(dim[0]);
711*57625b84SAmlan Barua       //n1 = 2*local_n1*dim[1];
712*57625b84SAmlan Barua       printf("The values are %ld\n",local_n1);
7133c3a9c75SAmlan Barua       if (fout) {
71409dd8a53SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
71509dd8a53SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
7163c3a9c75SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
7173c3a9c75SAmlan Barua       }
718f76f76beSAmlan Barua       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
7193c3a9c75SAmlan Barua 
7203c3a9c75SAmlan Barua #else
721b2b77a04SHong Zhang       /* Get local size */
72264657d84SAmlan Barua      //printf("Hope this does not come here");
723b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
724b2b77a04SHong Zhang       if (fin) {
725b2b77a04SHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
726b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
727b2b77a04SHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
728b2b77a04SHong Zhang       }
729b2b77a04SHong Zhang       if (fout) {
730b2b77a04SHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
731b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
732b2b77a04SHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
733b2b77a04SHong Zhang       }
73464657d84SAmlan Barua      //printf("Hope this does not come here");
7353c3a9c75SAmlan Barua #endif
736b2b77a04SHong Zhang       break;
737b2b77a04SHong Zhang     case 3:
7386882372aSHong Zhang       /* Get local size */
7393c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
74051d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
74151d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
74251d1eed7SAmlan Barua       if (fin) {
74351d1eed7SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
74451d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
74551d1eed7SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
74651d1eed7SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
74751d1eed7SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
74851d1eed7SAmlan Barua       }
749*57625b84SAmlan Barua       printf("The value is %ld",local_n1);
750*57625b84SAmlan Barua       n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
75151d1eed7SAmlan Barua       if (fout) {
75251d1eed7SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
75351d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
75451d1eed7SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
75551d1eed7SAmlan Barua       }
75651d1eed7SAmlan Barua       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
75751d1eed7SAmlan Barua 
75851d1eed7SAmlan Barua 
7593c3a9c75SAmlan Barua #else
7606882372aSHong Zhang       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
76111902ff2SHong Zhang //      printf("The quantity n is %d",n);
7626882372aSHong Zhang       if (fin) {
7636882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7646882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
7656882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
7666882372aSHong Zhang       }
7676882372aSHong Zhang       if (fout) {
7686882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7696882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
7706882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
7716882372aSHong Zhang       }
7723c3a9c75SAmlan Barua #endif
773b2b77a04SHong Zhang       break;
774b2b77a04SHong Zhang     default:
7756882372aSHong Zhang       /* Get local size */
7763c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
777b3a17365SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
778b3a17365SAmlan Barua       printf("The value of temp is %ld\n",temp);
779b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
780b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
781b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
782b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
783b3a17365SAmlan Barua       if (fin) {
784b3a17365SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
785b3a17365SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
786b3a17365SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
787b3a17365SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
788b3a17365SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
789b3a17365SAmlan Barua       }
790*57625b84SAmlan Barua       printf("The value is %ld. Global length is %d \n",local_n1,N1);
791*57625b84SAmlan Barua       temp = fftw->partial_dim;
792*57625b84SAmlan Barua       fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
793*57625b84SAmlan Barua 
794*57625b84SAmlan Barua       n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
795b3a17365SAmlan Barua       if (fout) {
796b3a17365SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
797*57625b84SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
798b3a17365SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
799b3a17365SAmlan Barua       }
800b3a17365SAmlan Barua 
8013c3a9c75SAmlan Barua #else
802c92418ccSAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
80311902ff2SHong Zhang //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
80411902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
80511902ff2SHong Zhang //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
80611902ff2SHong Zhang //      for(i=0;i<ndim;i++)
80711902ff2SHong Zhang //         {
80811902ff2SHong Zhang //          pdim[i]=dim[i];printf("%d",pdim[i]);
80911902ff2SHong Zhang //         }
81011902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
81111902ff2SHong Zhang //      printf("The quantity n is %d",n);
8126882372aSHong Zhang       if (fin) {
8136882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
8146882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
8156882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
8166882372aSHong Zhang       }
8176882372aSHong Zhang       if (fout) {
8186882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
8196882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
8206882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
8216882372aSHong Zhang       }
8223c3a9c75SAmlan Barua #endif
823b2b77a04SHong Zhang       break;
824b2b77a04SHong Zhang     }
825b2b77a04SHong Zhang   }
82654dd5118SAmlan Barua //  if (fin){
82754dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
82854dd5118SAmlan Barua //  }
82954dd5118SAmlan Barua //  if (fout){
83054dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
83154dd5118SAmlan Barua //  }
832b2b77a04SHong Zhang   PetscFunctionReturn(0);
833b2b77a04SHong Zhang }
834b2b77a04SHong Zhang 
835b2b77a04SHong Zhang #undef __FUNCT__
8363c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT"
8373c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
8383c3a9c75SAmlan Barua {
8393c3a9c75SAmlan Barua   PetscErrorCode ierr;
8403c3a9c75SAmlan Barua   PetscFunctionBegin;
8413c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
8423c3a9c75SAmlan Barua   PetscFunctionReturn(0);
8433c3a9c75SAmlan Barua }
84454efbe56SHong Zhang 
845b2b77a04SHong Zhang /*
8463c3a9c75SAmlan Barua       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block
8473c3a9c75SAmlan Barua   Input A, x, y
8483c3a9c75SAmlan Barua   A - FFTW matrix
84954dd5118SAmlan Barua   x - user data
850b2b77a04SHong Zhang   Options Database Keys:
851b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags
852b2b77a04SHong Zhang 
853b2b77a04SHong Zhang    Level: intermediate
854b2b77a04SHong Zhang 
855b2b77a04SHong Zhang */
8563c3a9c75SAmlan Barua 
8573c3a9c75SAmlan Barua EXTERN_C_BEGIN
8583c3a9c75SAmlan Barua #undef __FUNCT__
8593c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW"
8603c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
8613c3a9c75SAmlan Barua {
8623c3a9c75SAmlan Barua   PetscErrorCode ierr;
8633c3a9c75SAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
8643c3a9c75SAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
8653c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
8663c3a9c75SAmlan Barua   PetscInt       N=fft->N, N1, n1 ,NM;
867b223cc97SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
868b223cc97SAmlan Barua   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
869e5d7f247SAmlan Barua   PetscInt       i,j,k,rank,size,partial_dim;
8703c3a9c75SAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
871e5d7f247SAmlan Barua   ptrdiff_t      local_n1,local_1_start,temp;
8723c3a9c75SAmlan Barua   VecScatter     vecscat;
8733c3a9c75SAmlan Barua   IS             list1,list2;
8743c3a9c75SAmlan Barua 
875f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
876f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
8773c3a9c75SAmlan Barua   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
878f76f76beSAmlan Barua   printf("Local ownership starts at %d\n",low);
8793c3a9c75SAmlan Barua 
880e81bb599SAmlan Barua   if (size==1)
881e81bb599SAmlan Barua     {
8827536937bSAmlan Barua /*     switch (ndim){
883e81bb599SAmlan Barua      case 1:
884e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
885e81bb599SAmlan Barua           for (i=0;i<dim[0];i++)
886e81bb599SAmlan Barua              {
887e81bb599SAmlan Barua               indx1[i] = i;
888e81bb599SAmlan Barua              }
889e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
890e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
891e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
892e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
893e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
894b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
895b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
896e81bb599SAmlan Barua      break;
897e81bb599SAmlan Barua 
898e81bb599SAmlan Barua      case 2:
899e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
900e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
901e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
902e81bb599SAmlan Barua                 indx1[i*dim[1]+j] = i*dim[1] + j;
903e81bb599SAmlan Barua              }
904e81bb599SAmlan Barua           }
905e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
906e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
907e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
908e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
910b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
911b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
912e81bb599SAmlan Barua           break;
913e81bb599SAmlan Barua      case 3:
914e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
915e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
916e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
917e81bb599SAmlan Barua                 for (k=0;k<dim[2];k++){
918e81bb599SAmlan Barua                    indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
919e81bb599SAmlan Barua                 }
920e81bb599SAmlan Barua              }
921e81bb599SAmlan Barua           }
922e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
923e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
924e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
925e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
926e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
927b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
928b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
929e81bb599SAmlan Barua           break;
930e81bb599SAmlan Barua      default:
9317536937bSAmlan Barua */
9326971673cSAmlan Barua           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);
9336971673cSAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
9346971673cSAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9356971673cSAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9366971673cSAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
937b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
9386971673cSAmlan Barua           //ierr = ISDestroy(list1);CHKERRQ(ierr);
9397536937bSAmlan Barua  //         break;
9407536937bSAmlan Barua   //    }
941e81bb599SAmlan Barua     }
942e81bb599SAmlan Barua 
943e81bb599SAmlan Barua  else{
944e81bb599SAmlan Barua 
9453c3a9c75SAmlan Barua  switch (ndim){
9463c3a9c75SAmlan Barua  case 1:
94764657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
94864657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
94964657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
95064657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
95164657d84SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
95264657d84SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
95364657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
95464657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
95564657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
95664657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
95764657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
95864657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
95964657d84SAmlan Barua       break;
96064657d84SAmlan Barua #else
961e5d7f247SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
96264657d84SAmlan Barua #endif
9633c3a9c75SAmlan Barua   break;
9643c3a9c75SAmlan Barua  case 2:
965bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
966bd59e6c6SAmlan Barua       //PetscInt my_value;
967bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
968bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
969bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
970bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
971bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
972bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
973bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
974bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
975bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
976bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
977bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
978bd59e6c6SAmlan Barua       break;
979bd59e6c6SAmlan Barua #else
9803c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
9813c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
9823c3a9c75SAmlan Barua 
9835b3e41ffSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
9845b3e41ffSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
9855b3e41ffSAmlan Barua       printf("Val local_0_start = %ld\n",local_0_start);
9863c3a9c75SAmlan Barua 
9873c3a9c75SAmlan Barua       if (dim[1]%2==0)
9883c3a9c75SAmlan Barua         NM = dim[1]+2;
9893c3a9c75SAmlan Barua       else
9903c3a9c75SAmlan Barua         NM = dim[1]+1;
9913c3a9c75SAmlan Barua 
9923c3a9c75SAmlan Barua       for (i=0;i<local_n0;i++){
9933c3a9c75SAmlan Barua          for (j=0;j<dim[1];j++){
9943c3a9c75SAmlan Barua             tempindx = i*dim[1] + j;
9953c3a9c75SAmlan Barua             tempindx1 = i*NM + j;
9965b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
9973c3a9c75SAmlan Barua             indx2[tempindx]=low+tempindx1;
9985b3e41ffSAmlan Barua            // printf("Val tempindx1 = %d\n",tempindx1);
9995b3e41ffSAmlan Barua   //          printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
10005b3e41ffSAmlan Barua   //          printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
10015b3e41ffSAmlan Barua   //          printf("-------------------------\n",indx2[tempindx],rank);
10023c3a9c75SAmlan Barua         }
10033c3a9c75SAmlan Barua      }
10043c3a9c75SAmlan Barua 
10053c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
10063c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
10073c3a9c75SAmlan Barua 
1008f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1009f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1010f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1011f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1012b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1013b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1014b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1015b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
10163c3a9c75SAmlan Barua       break;
1017bd59e6c6SAmlan Barua #endif
10183c3a9c75SAmlan Barua 
10193c3a9c75SAmlan Barua  case 3:
1020bd59e6c6SAmlan Barua       #if defined(PETSC_USE_COMPLEX)
1021bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1022bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1023bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1024bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1025bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1026bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1027bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1030bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1031bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1032bd59e6c6SAmlan Barua       break;
1033bd59e6c6SAmlan Barua #else
103451d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
103551d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
103651d1eed7SAmlan Barua 
103751d1eed7SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
103851d1eed7SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
103951d1eed7SAmlan Barua       printf("Val local_0_start = %ld\n",local_0_start);
104051d1eed7SAmlan Barua 
104151d1eed7SAmlan Barua       if (dim[2]%2==0)
104251d1eed7SAmlan Barua         NM = dim[2]+2;
104351d1eed7SAmlan Barua       else
104451d1eed7SAmlan Barua         NM = dim[2]+1;
104551d1eed7SAmlan Barua 
104651d1eed7SAmlan Barua       for (i=0;i<local_n0;i++){
104751d1eed7SAmlan Barua          for (j=0;j<dim[1];j++){
104851d1eed7SAmlan Barua             for (k=0;k<dim[2];k++){
104951d1eed7SAmlan Barua                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
105051d1eed7SAmlan Barua                tempindx1 = i*dim[1]*NM + j*NM + k;
105151d1eed7SAmlan Barua                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
105251d1eed7SAmlan Barua                indx2[tempindx]=low+tempindx1;
105351d1eed7SAmlan Barua             }
105451d1eed7SAmlan Barua            // printf("Val tempindx1 = %d\n",tempindx1);
105551d1eed7SAmlan Barua            // printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
105651d1eed7SAmlan Barua            // printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
105751d1eed7SAmlan Barua            // printf("-------------------------\n",indx2[tempindx],rank);
105851d1eed7SAmlan Barua         }
105951d1eed7SAmlan Barua      }
106051d1eed7SAmlan Barua 
106151d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
106251d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
106351d1eed7SAmlan Barua 
106451d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
106551d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
106651d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
106751d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1068b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1069b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1070b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1071b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
10723c3a9c75SAmlan Barua       break;
1073bd59e6c6SAmlan Barua #endif
10743c3a9c75SAmlan Barua 
10753c3a9c75SAmlan Barua  default:
1076bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1077bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1078bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1079bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1080bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1081bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1082bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1083bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1084bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1085bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1086bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1087bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1088bd59e6c6SAmlan Barua 
1089bd59e6c6SAmlan Barua 
1090bd59e6c6SAmlan Barua #else
1091e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1092e5d7f247SAmlan Barua       printf("The value of temp is %ld\n",temp);
1093e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1094e5d7f247SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1095e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1096e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1097e5d7f247SAmlan Barua 
1098e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
1099e5d7f247SAmlan Barua       printf("The value of partial dim is %d\n",partial_dim);
1100e5d7f247SAmlan Barua 
1101e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1102e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1103e5d7f247SAmlan Barua       printf("Val local_0_start = %ld\n",local_0_start);
1104e5d7f247SAmlan Barua 
1105e5d7f247SAmlan Barua       if (dim[ndim-1]%2==0)
1106ba6e06d0SAmlan Barua         NM = 2;
1107e5d7f247SAmlan Barua       else
1108ba6e06d0SAmlan Barua         NM = 1;
1109e5d7f247SAmlan Barua 
11106971673cSAmlan Barua       j = low;
11116971673cSAmlan Barua       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
11126971673cSAmlan Barua          {
11136971673cSAmlan Barua           indx1[i] = local_0_start*partial_dim + i;
11146971673cSAmlan Barua           indx2[i] = j;
1115ba6e06d0SAmlan Barua           //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
11166971673cSAmlan Barua           if (k%dim[ndim-1]==0)
11176971673cSAmlan Barua             { j+=NM;}
11186971673cSAmlan Barua           j++;
11196971673cSAmlan Barua          }
11206971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
11216971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
11226971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
11236971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
11246971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
11256971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1126b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1127b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1128b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1129b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
11303c3a9c75SAmlan Barua       break;
1131bd59e6c6SAmlan Barua #endif
11323c3a9c75SAmlan Barua   }
1133e81bb599SAmlan Barua  }
11343c3a9c75SAmlan Barua 
11353c3a9c75SAmlan Barua  return 0;
11363c3a9c75SAmlan Barua }
11373c3a9c75SAmlan Barua EXTERN_C_END
11383c3a9c75SAmlan Barua 
11393c3a9c75SAmlan Barua #undef __FUNCT__
11403c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT"
11413c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
11423c3a9c75SAmlan Barua {
11433c3a9c75SAmlan Barua   PetscErrorCode ierr;
11443c3a9c75SAmlan Barua   PetscFunctionBegin;
11453c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
11463c3a9c75SAmlan Barua   PetscFunctionReturn(0);
11473c3a9c75SAmlan Barua }
114854efbe56SHong Zhang 
11495b3e41ffSAmlan Barua /*
11505b3e41ffSAmlan Barua       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
11515b3e41ffSAmlan Barua   Input A, x, y
11525b3e41ffSAmlan Barua   A - FFTW matrix
11535b3e41ffSAmlan Barua   x - FFTW vector
11545b3e41ffSAmlan Barua   y - PETSc vector that user can read
11555b3e41ffSAmlan Barua   Options Database Keys:
11565b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
11575b3e41ffSAmlan Barua 
11585b3e41ffSAmlan Barua    Level: intermediate
11595b3e41ffSAmlan Barua 
11603c3a9c75SAmlan Barua */
11613c3a9c75SAmlan Barua 
11623c3a9c75SAmlan Barua EXTERN_C_BEGIN
11633c3a9c75SAmlan Barua #undef __FUNCT__
11645b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW"
11655b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
11665b3e41ffSAmlan Barua {
11675b3e41ffSAmlan Barua   PetscErrorCode ierr;
11685b3e41ffSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
11695b3e41ffSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
11705b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
11715b3e41ffSAmlan Barua   PetscInt       N=fft->N, N1, n1 ,NM;
1172b223cc97SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
1173b223cc97SAmlan Barua   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
1174ba6e06d0SAmlan Barua   PetscInt       i,j,k,rank,size,partial_dim;
11755b3e41ffSAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
1176ba6e06d0SAmlan Barua   ptrdiff_t      local_n1,local_1_start,temp;
11775b3e41ffSAmlan Barua   VecScatter     vecscat;
11785b3e41ffSAmlan Barua   IS             list1,list2;
11795b3e41ffSAmlan Barua 
11805b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
11815b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1182cfe1ae98SAmlan Barua   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);
11835b3e41ffSAmlan Barua 
1184e81bb599SAmlan Barua   if (size==1){
11857536937bSAmlan Barua /*
11865b3e41ffSAmlan Barua     switch (ndim){
11875b3e41ffSAmlan Barua     case 1:
1188e81bb599SAmlan Barua            ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
1189e81bb599SAmlan Barua           for (i=0;i<dim[0];i++)
1190e81bb599SAmlan Barua              {
1191e81bb599SAmlan Barua               indx1[i] = i;
1192e81bb599SAmlan Barua              }
1193e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1194e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1195e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1196e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1197e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1198b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
1199b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
1200e81bb599SAmlan Barua           break;
1201e81bb599SAmlan Barua 
1202e81bb599SAmlan Barua     case 2:
1203e81bb599SAmlan Barua          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
1204e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
1205e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
1206e81bb599SAmlan Barua                 indx1[i*dim[1]+j] = i*dim[1] + j;
1207e81bb599SAmlan Barua              }
1208e81bb599SAmlan Barua           }
1209e81bb599SAmlan Barua          ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1210e81bb599SAmlan Barua          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1211e81bb599SAmlan Barua          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1212e81bb599SAmlan Barua          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1213e81bb599SAmlan Barua          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1214b223cc97SAmlan Barua          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1215b223cc97SAmlan Barua          ierr = PetscFree(indx1);CHKERRQ(ierr);
1216e81bb599SAmlan Barua          break;
1217e81bb599SAmlan Barua     case 3:
1218e81bb599SAmlan Barua          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1219e81bb599SAmlan Barua          for (i=0;i<dim[0];i++){
1220e81bb599SAmlan Barua             for (j=0;j<dim[1];j++){
1221e81bb599SAmlan Barua                for (k=0;k<dim[2];k++){
1222e81bb599SAmlan Barua                   indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
1223e81bb599SAmlan Barua                }
1224e81bb599SAmlan Barua             }
1225e81bb599SAmlan Barua          }
1226e81bb599SAmlan Barua          ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1227e81bb599SAmlan Barua          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1228e81bb599SAmlan Barua          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229e81bb599SAmlan Barua          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1230e81bb599SAmlan Barua          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1231b223cc97SAmlan Barua          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1232b223cc97SAmlan Barua          ierr = PetscFree(indx1);CHKERRQ(ierr);
1233e81bb599SAmlan Barua          break;
1234e81bb599SAmlan Barua     default:
12357536937bSAmlan Barua */
12366971673cSAmlan Barua          ierr = ISCreateStride(comm,N,0,1,&list1);
12376971673cSAmlan Barua          //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF);
12386971673cSAmlan Barua          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
12396971673cSAmlan Barua          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
12406971673cSAmlan Barua          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
12416971673cSAmlan Barua          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1242b223cc97SAmlan Barua          ierr = ISDestroy(&list1);CHKERRQ(ierr);
12437536937bSAmlan Barua   //       break;
12447536937bSAmlan Barua    // }
1245e81bb599SAmlan Barua   }
1246e81bb599SAmlan Barua   else{
1247e81bb599SAmlan Barua 
1248e81bb599SAmlan Barua  switch (ndim){
1249e81bb599SAmlan Barua  case 1:
125064657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
125164657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
125264657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
125364657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
125464657d84SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
125564657d84SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
125664657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
125764657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
125864657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
125964657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
126064657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
126164657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
126264657d84SAmlan Barua       break;
126364657d84SAmlan Barua 
126464657d84SAmlan Barua #else
12656a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
126664657d84SAmlan Barua #endif
12675b3e41ffSAmlan Barua   break;
12685b3e41ffSAmlan Barua  case 2:
1269bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1270bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1271bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1272bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
1273bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1274bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1275bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1276bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1277bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1278bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1279bd59e6c6SAmlan Barua       break;
1280bd59e6c6SAmlan Barua #else
12815b3e41ffSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
12825b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
12835b3e41ffSAmlan Barua 
12845b3e41ffSAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
12855b3e41ffSAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
12865b3e41ffSAmlan Barua      printf("Val local_0_start = %ld\n",local_0_start);
12875b3e41ffSAmlan Barua 
12885b3e41ffSAmlan Barua      if (dim[1]%2==0)
12895b3e41ffSAmlan Barua       NM = dim[1]+2;
12905b3e41ffSAmlan Barua     else
12915b3e41ffSAmlan Barua       NM = dim[1]+1;
12925b3e41ffSAmlan Barua 
12935b3e41ffSAmlan Barua 
12945b3e41ffSAmlan Barua 
12955b3e41ffSAmlan Barua      for (i=0;i<local_n0;i++){
12965b3e41ffSAmlan Barua         for (j=0;j<dim[1];j++){
12975b3e41ffSAmlan Barua             tempindx = i*dim[1] + j;
12985b3e41ffSAmlan Barua             tempindx1 = i*NM + j;
12995b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
13005b3e41ffSAmlan Barua             indx2[tempindx]=low+tempindx1;
1301cfe1ae98SAmlan Barua        //     printf("Val tempindx1 = %d\n",tempindx1);
1302cfe1ae98SAmlan Barua        //     printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1303cfe1ae98SAmlan Barua        //     printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1304cfe1ae98SAmlan Barua        //     printf("-------------------------\n",indx2[tempindx],rank);
13055b3e41ffSAmlan Barua         }
13065b3e41ffSAmlan Barua      }
13075b3e41ffSAmlan Barua 
13085b3e41ffSAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
13095b3e41ffSAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
13105b3e41ffSAmlan Barua 
13115b3e41ffSAmlan Barua      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
13125b3e41ffSAmlan Barua      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
13135b3e41ffSAmlan Barua      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
13145b3e41ffSAmlan Barua      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1315b223cc97SAmlan Barua      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1316b223cc97SAmlan Barua      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1317b223cc97SAmlan Barua      ierr = PetscFree(indx1);CHKERRQ(ierr);
1318b223cc97SAmlan Barua      ierr = PetscFree(indx2);CHKERRQ(ierr);
13195b3e41ffSAmlan Barua   break;
1320bd59e6c6SAmlan Barua #endif
13215b3e41ffSAmlan Barua  case 3:
1322bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1323bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1324bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1325bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1326bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1327bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1328bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1329bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1330bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1331bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1332bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1333bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1334bd59e6c6SAmlan Barua       break;
1335bd59e6c6SAmlan Barua #else
1336bd59e6c6SAmlan Barua 
133751d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
133851d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
133951d1eed7SAmlan Barua 
134051d1eed7SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
134151d1eed7SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
134251d1eed7SAmlan Barua      printf("Val local_0_start = %ld\n",local_0_start);
134351d1eed7SAmlan Barua 
134451d1eed7SAmlan Barua      if (dim[2]%2==0)
134551d1eed7SAmlan Barua       NM = dim[2]+2;
134651d1eed7SAmlan Barua     else
134751d1eed7SAmlan Barua       NM = dim[2]+1;
134851d1eed7SAmlan Barua 
134951d1eed7SAmlan Barua      for (i=0;i<local_n0;i++){
135051d1eed7SAmlan Barua         for (j=0;j<dim[1];j++){
135151d1eed7SAmlan Barua            for (k=0;k<dim[2];k++){
135251d1eed7SAmlan Barua               tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
135351d1eed7SAmlan Barua               tempindx1 = i*dim[1]*NM + j*NM + k;
135451d1eed7SAmlan Barua               indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
135551d1eed7SAmlan Barua               indx2[tempindx]=low+tempindx1;
135651d1eed7SAmlan Barua            }
135751d1eed7SAmlan Barua         //    printf("Val tempindx1 = %d\n",tempindx1);
135851d1eed7SAmlan Barua         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
135951d1eed7SAmlan Barua         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
136051d1eed7SAmlan Barua         //    printf("-------------------------\n",indx2[tempindx],rank);
136151d1eed7SAmlan Barua         }
136251d1eed7SAmlan Barua      }
136351d1eed7SAmlan Barua 
136451d1eed7SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
136551d1eed7SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
136651d1eed7SAmlan Barua 
136751d1eed7SAmlan Barua      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
136851d1eed7SAmlan Barua      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
136951d1eed7SAmlan Barua      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
137051d1eed7SAmlan Barua      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1371b223cc97SAmlan Barua      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1372b223cc97SAmlan Barua      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1373b223cc97SAmlan Barua      ierr = PetscFree(indx1);CHKERRQ(ierr);
1374b223cc97SAmlan Barua      ierr = PetscFree(indx2);CHKERRQ(ierr);
13755b3e41ffSAmlan Barua   break;
1376bd59e6c6SAmlan Barua #endif
13775b3e41ffSAmlan Barua   default:
1378bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1379bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1380bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1381bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1382bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1383bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1384bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1385bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1386bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1387bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1388bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1389bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1390bd59e6c6SAmlan Barua #else
1391ba6e06d0SAmlan Barua      temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1392ba6e06d0SAmlan Barua      printf("The value of temp is %ld\n",temp);
1393ba6e06d0SAmlan Barua      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1394ba6e06d0SAmlan Barua      alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1395ba6e06d0SAmlan Barua      N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1396ba6e06d0SAmlan Barua      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1397ba6e06d0SAmlan Barua 
1398ba6e06d0SAmlan Barua      partial_dim = fftw->partial_dim;
1399ba6e06d0SAmlan Barua      printf("The value of partial dim is %d\n",partial_dim);
1400ba6e06d0SAmlan Barua 
1401ba6e06d0SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1402ba6e06d0SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1403ba6e06d0SAmlan Barua      printf("Val local_0_start = %ld\n",local_0_start);
1404ba6e06d0SAmlan Barua 
1405ba6e06d0SAmlan Barua      if (dim[ndim-1]%2==0)
1406ba6e06d0SAmlan Barua        NM = 2;
1407ba6e06d0SAmlan Barua      else
1408ba6e06d0SAmlan Barua        NM = 1;
1409ba6e06d0SAmlan Barua 
1410ba6e06d0SAmlan Barua      j = low;
1411ba6e06d0SAmlan Barua      for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1412ba6e06d0SAmlan Barua         {
1413ba6e06d0SAmlan Barua          indx1[i] = local_0_start*partial_dim + i;
1414ba6e06d0SAmlan Barua          indx2[i] = j;
1415ba6e06d0SAmlan Barua          //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
1416ba6e06d0SAmlan Barua          if (k%dim[ndim-1]==0)
1417ba6e06d0SAmlan Barua            { j+=NM;}
1418ba6e06d0SAmlan Barua          j++;
1419ba6e06d0SAmlan Barua         }
1420ba6e06d0SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1421ba6e06d0SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1422ba6e06d0SAmlan Barua 
1423ba6e06d0SAmlan Barua       //ISView(list1,PETSC_VIEWER_STDOUT_SELF);
1424ba6e06d0SAmlan Barua 
1425ba6e06d0SAmlan Barua 
1426ba6e06d0SAmlan Barua      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1427ba6e06d0SAmlan Barua      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1428ba6e06d0SAmlan Barua      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1429ba6e06d0SAmlan Barua      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1430b223cc97SAmlan Barua      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1431b223cc97SAmlan Barua      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1432b223cc97SAmlan Barua      ierr = PetscFree(indx1);CHKERRQ(ierr);
1433b223cc97SAmlan Barua      ierr = PetscFree(indx2);CHKERRQ(ierr);
1434ba6e06d0SAmlan Barua 
14355b3e41ffSAmlan Barua      break;
1436bd59e6c6SAmlan Barua #endif
14375b3e41ffSAmlan Barua  }
1438e81bb599SAmlan Barua  }
14395b3e41ffSAmlan Barua  return 0;
14405b3e41ffSAmlan Barua }
14415b3e41ffSAmlan Barua EXTERN_C_END
14425b3e41ffSAmlan Barua 
14435b3e41ffSAmlan Barua EXTERN_C_BEGIN
14445b3e41ffSAmlan Barua #undef __FUNCT__
14453c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
14463c3a9c75SAmlan Barua /*
14473c3a9c75SAmlan Barua       MatCreate_FFTW - Creates a matrix object that provides FFT
14483c3a9c75SAmlan Barua   via the external package FFTW
14493c3a9c75SAmlan Barua   Options Database Keys:
14503c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
14513c3a9c75SAmlan Barua 
14523c3a9c75SAmlan Barua    Level: intermediate
14533c3a9c75SAmlan Barua 
14543c3a9c75SAmlan Barua */
14553c3a9c75SAmlan Barua 
1456b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A)
1457b2b77a04SHong Zhang {
1458b2b77a04SHong Zhang   PetscErrorCode ierr;
1459b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
1460b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1461b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1462b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1463b2b77a04SHong Zhang   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1464b2b77a04SHong Zhang   PetscBool      flg;
1465b3a17365SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr,N1;
146611902ff2SHong Zhang   PetscMPIInt    size,rank;
1467b3a17365SAmlan Barua   ptrdiff_t      *pdim, temp;
14684a16a297SSean Farley   ptrdiff_t      local_n1,local_1_start,local_1_end;
1469b2b77a04SHong Zhang 
1470b2b77a04SHong Zhang   PetscFunctionBegin;
1471b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
147211902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
147354dd5118SAmlan Barua   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1474c92418ccSAmlan Barua 
147511902ff2SHong Zhang   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
147611902ff2SHong Zhang   pdim[0] = dim[0];
147711902ff2SHong Zhang   for(ctr=1;ctr<ndim;ctr++)
147811902ff2SHong Zhang       {
14796882372aSHong Zhang           partial_dim *= dim[ctr];
148011902ff2SHong Zhang           pdim[ctr] = dim[ctr];
14816882372aSHong Zhang       }
14823c3a9c75SAmlan Barua 
1483b2b77a04SHong Zhang   if (size == 1) {
1484e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1485b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1486b2b77a04SHong Zhang     n = N;
1487e81bb599SAmlan Barua #else
1488e81bb599SAmlan Barua     int tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1];
1489e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1490e81bb599SAmlan Barua     n = tot_dim;
1491e81bb599SAmlan Barua #endif
1492e81bb599SAmlan Barua 
1493b2b77a04SHong Zhang   } else {
1494b223cc97SAmlan Barua     ptrdiff_t alloc_local,local_n0,local_0_start;//local_n1,local_1_end;
1495b2b77a04SHong Zhang     switch (ndim){
1496b2b77a04SHong Zhang     case 1:
14973c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
14983c3a9c75SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1499e5d7f247SAmlan Barua #else
15006882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
15016882372aSHong Zhang       n = (PetscInt)local_n0;
1502798b254bSAmlan Barua       printf("The value of n is %d",n);
15036882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1504e5d7f247SAmlan Barua #endif
15053c3a9c75SAmlan Barua //      PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW);
1506b2b77a04SHong Zhang       break;
1507b2b77a04SHong Zhang     case 2:
15085b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
1509b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1510b2b77a04SHong Zhang       /*
1511b2b77a04SHong Zhang        PetscMPIInt    rank;
1512b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1513b2b77a04SHong Zhang        PetscSynchronizedFlush(comm);
1514b2b77a04SHong Zhang        */
1515b2b77a04SHong Zhang       n = (PetscInt)local_n0*dim[1];
1516b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
15175b3e41ffSAmlan Barua #else
15185b3e41ffSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
15195b3e41ffSAmlan Barua       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
15205b3e41ffSAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
15215b3e41ffSAmlan Barua #endif
1522b2b77a04SHong Zhang       break;
1523b2b77a04SHong Zhang     case 3:
152411902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
152551d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
152651d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
15276882372aSHong Zhang       n = (PetscInt)local_n0*dim[1]*dim[2];
15286882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
152951d1eed7SAmlan Barua #else
153051d1eed7SAmlan Barua       printf("The code comes here\n");
153151d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
153251d1eed7SAmlan Barua       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
153351d1eed7SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
153451d1eed7SAmlan Barua #endif
1535b2b77a04SHong Zhang       break;
1536b2b77a04SHong Zhang     default:
1537b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
153811902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1539c92418ccSAmlan Barua //      printf("The value of alloc local is %ld from process %d\n",alloc_local,rank);
154011902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
15416882372aSHong Zhang       n = (PetscInt)local_n0*partial_dim;
154211902ff2SHong Zhang //      printf("New partial dimension is %d %d %d",n,N,ndim);
15436882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1544b3a17365SAmlan Barua #else
1545b3a17365SAmlan Barua       temp = pdim[ndim-1];
1546b3a17365SAmlan Barua       pdim[ndim-1]= temp/2 + 1;
1547b3a17365SAmlan Barua       printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]);
1548b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1549b3a17365SAmlan Barua       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1550b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1551b3a17365SAmlan Barua       pdim[ndim-1] = temp;
1552b3a17365SAmlan Barua       printf("For Multi dim case n = %d, N1  = %d\n",n,N1);
1553b3a17365SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1554b3a17365SAmlan Barua #endif
1555b2b77a04SHong Zhang       break;
1556b2b77a04SHong Zhang     }
1557b2b77a04SHong Zhang   }
1558b2b77a04SHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1559b2b77a04SHong Zhang   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1560b2b77a04SHong Zhang   fft->data = (void*)fftw;
1561b2b77a04SHong Zhang 
1562b2b77a04SHong Zhang   fft->n           = n;
1563c92418ccSAmlan Barua   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1564e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
1565c92418ccSAmlan Barua   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1566b1301b2eSHong Zhang   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1567c92418ccSAmlan Barua 
1568b2b77a04SHong Zhang   fftw->p_forward  = 0;
1569b2b77a04SHong Zhang   fftw->p_backward = 0;
1570b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1571b2b77a04SHong Zhang 
1572b2b77a04SHong Zhang   if (size == 1){
1573b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1574b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1575b2b77a04SHong Zhang   } else {
1576b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1577b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1578b2b77a04SHong Zhang   }
1579b2b77a04SHong Zhang   fft->matdestroy          = MatDestroy_FFTW;
15806a506ed8SAmlan Barua // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D
1581b2b77a04SHong Zhang   A->ops->getvecs       = MatGetVecs_FFTW;
1582b2b77a04SHong Zhang   A->assembled          = PETSC_TRUE;
15834f7415efSAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFT_C","MatGetVecsFFT_FFTW",MatGetVecsFFT_FFTW);
15843c3a9c75SAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
15855b3e41ffSAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
1586b2b77a04SHong Zhang 
1587b2b77a04SHong Zhang   /* get runtime options */
1588b2b77a04SHong Zhang   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1589b2b77a04SHong Zhang     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1590b2b77a04SHong Zhang     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1591b2b77a04SHong Zhang   PetscOptionsEnd();
1592b2b77a04SHong Zhang   PetscFunctionReturn(0);
1593b2b77a04SHong Zhang }
1594b2b77a04SHong Zhang EXTERN_C_END
15953c3a9c75SAmlan Barua 
15963c3a9c75SAmlan Barua 
15973c3a9c75SAmlan Barua 
15983c3a9c75SAmlan Barua 
1599