xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 4f7415ef3e74e0548d123addcfc344de24dbccc1)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14e5d7f247SAmlan Barua   PetscInt   partial_dim;
15b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
16b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
19b2b77a04SHong Zhang } Mat_FFTW;
20b2b77a04SHong Zhang 
21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
28*4f7415efSAmlan Barua extern PetscErrorCode MatGetVecsFFT_FFTW(Mat,Vec*,Vec*,Vec*);
29b2b77a04SHong Zhang 
30b2b77a04SHong Zhang #undef __FUNCT__
31b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
32b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
33b2b77a04SHong Zhang {
34b2b77a04SHong Zhang   PetscErrorCode ierr;
35b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
36b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
37b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
38b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
39b2b77a04SHong Zhang 
40b2b77a04SHong Zhang   PetscFunctionBegin;
41b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
42b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
43b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
44b2b77a04SHong Zhang     switch (ndim){
45b2b77a04SHong Zhang     case 1:
4658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
47b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
4858a912c5SAmlan Barua #else
4958a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
5058a912c5SAmlan Barua #endif
51b2b77a04SHong Zhang       break;
52b2b77a04SHong Zhang     case 2:
5358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
54b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
5558a912c5SAmlan Barua #else
5658a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
5758a912c5SAmlan Barua #endif
58b2b77a04SHong Zhang       break;
59b2b77a04SHong Zhang     case 3:
6058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
61b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6258a912c5SAmlan Barua #else
6358a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
6458a912c5SAmlan Barua #endif
65b2b77a04SHong Zhang       break;
66b2b77a04SHong Zhang     default:
6758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
68b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6958a912c5SAmlan Barua #else
7058a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
7158a912c5SAmlan Barua #endif
72b2b77a04SHong Zhang       break;
73b2b77a04SHong Zhang     }
74b2b77a04SHong Zhang     fftw->finarray  = x_array;
75b2b77a04SHong Zhang     fftw->foutarray = y_array;
76b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
77b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
78b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
79b2b77a04SHong Zhang   } else { /* use existing plan */
80b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
81b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
82b2b77a04SHong Zhang     } else {
83b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
84b2b77a04SHong Zhang     }
85b2b77a04SHong Zhang   }
86b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
87b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
88b2b77a04SHong Zhang   PetscFunctionReturn(0);
89b2b77a04SHong Zhang }
90b2b77a04SHong Zhang 
91b2b77a04SHong Zhang #undef __FUNCT__
92b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
93b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
94b2b77a04SHong Zhang {
95b2b77a04SHong Zhang   PetscErrorCode ierr;
96b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
97b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
98b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
99b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
100b2b77a04SHong Zhang 
101b2b77a04SHong Zhang   PetscFunctionBegin;
102b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
103b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
104b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
105b2b77a04SHong Zhang     switch (ndim){
106b2b77a04SHong Zhang     case 1:
10758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
108b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
10958a912c5SAmlan Barua #else
110e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
11158a912c5SAmlan Barua #endif
112b2b77a04SHong Zhang       break;
113b2b77a04SHong Zhang     case 2:
11458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
115b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
11658a912c5SAmlan Barua #else
117e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
11858a912c5SAmlan Barua #endif
119b2b77a04SHong Zhang       break;
120b2b77a04SHong Zhang     case 3:
12158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
122b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12358a912c5SAmlan Barua #else
124e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
12558a912c5SAmlan Barua #endif
126b2b77a04SHong Zhang       break;
127b2b77a04SHong Zhang     default:
12858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
129b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
13058a912c5SAmlan Barua #else
131e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
13258a912c5SAmlan Barua #endif
133b2b77a04SHong Zhang       break;
134b2b77a04SHong Zhang     }
135b2b77a04SHong Zhang     fftw->binarray  = x_array;
136b2b77a04SHong Zhang     fftw->boutarray = y_array;
137b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
138b2b77a04SHong Zhang   } else { /* use existing plan */
139b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
140b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
141b2b77a04SHong Zhang     } else {
142b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
143b2b77a04SHong Zhang     }
144b2b77a04SHong Zhang   }
145b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
146b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
147b2b77a04SHong Zhang   PetscFunctionReturn(0);
148b2b77a04SHong Zhang }
149b2b77a04SHong Zhang 
150b2b77a04SHong Zhang #undef __FUNCT__
151b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
152b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
153b2b77a04SHong Zhang {
154b2b77a04SHong Zhang   PetscErrorCode ierr;
155b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
156b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
157b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
158c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
159b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
160b2b77a04SHong Zhang 
161b2b77a04SHong Zhang   PetscFunctionBegin;
162b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
163b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
164b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
165b2b77a04SHong Zhang     switch (ndim){
166b2b77a04SHong Zhang     case 1:
1673c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
168b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
169ae0a50aaSHong Zhang #else
170ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
1713c3a9c75SAmlan Barua #endif
172b2b77a04SHong Zhang       break;
173b2b77a04SHong Zhang     case 2:
1743c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
175b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1763c3a9c75SAmlan Barua #else
1773c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1783c3a9c75SAmlan Barua #endif
179b2b77a04SHong Zhang       break;
180b2b77a04SHong Zhang     case 3:
1813c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
182b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1833c3a9c75SAmlan Barua #else
18451d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1853c3a9c75SAmlan Barua #endif
186b2b77a04SHong Zhang       break;
187b2b77a04SHong Zhang     default:
1883c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
189c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1903c3a9c75SAmlan Barua #else
1913c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1923c3a9c75SAmlan Barua #endif
193b2b77a04SHong Zhang       break;
194b2b77a04SHong Zhang     }
195b2b77a04SHong Zhang     fftw->finarray  = x_array;
196b2b77a04SHong Zhang     fftw->foutarray = y_array;
197b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
198b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
199b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
200b2b77a04SHong Zhang   } else { /* use existing plan */
201b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
202b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
203b2b77a04SHong Zhang     } else {
204b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
205b2b77a04SHong Zhang     }
206b2b77a04SHong Zhang   }
207b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
208b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
209b2b77a04SHong Zhang   PetscFunctionReturn(0);
210b2b77a04SHong Zhang }
211b2b77a04SHong Zhang 
212b2b77a04SHong Zhang #undef __FUNCT__
213b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
214b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
215b2b77a04SHong Zhang {
216b2b77a04SHong Zhang   PetscErrorCode ierr;
217b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
218b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
219b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
220c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
221b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
222b2b77a04SHong Zhang 
223b2b77a04SHong Zhang   PetscFunctionBegin;
224b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
225b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
226b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
227b2b77a04SHong Zhang     switch (ndim){
228b2b77a04SHong Zhang     case 1:
2293c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
230b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
231ae0a50aaSHong Zhang #else
232ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
2333c3a9c75SAmlan Barua #endif
234b2b77a04SHong Zhang       break;
235b2b77a04SHong Zhang     case 2:
2363c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
237b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2383c3a9c75SAmlan Barua #else
2393c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2403c3a9c75SAmlan Barua #endif
241b2b77a04SHong Zhang       break;
242b2b77a04SHong Zhang     case 3:
2433c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
244b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2453c3a9c75SAmlan Barua #else
2463c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2473c3a9c75SAmlan Barua #endif
248b2b77a04SHong Zhang       break;
249b2b77a04SHong Zhang     default:
2503c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
251c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2523c3a9c75SAmlan Barua #else
2533c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2543c3a9c75SAmlan Barua #endif
255b2b77a04SHong Zhang       break;
256b2b77a04SHong Zhang     }
257b2b77a04SHong Zhang     fftw->binarray  = x_array;
258b2b77a04SHong Zhang     fftw->boutarray = y_array;
259b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
260b2b77a04SHong Zhang   } else { /* use existing plan */
261b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
262b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
263b2b77a04SHong Zhang     } else {
264b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
265b2b77a04SHong Zhang     }
266b2b77a04SHong Zhang   }
267b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
268b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
269b2b77a04SHong Zhang   PetscFunctionReturn(0);
270b2b77a04SHong Zhang }
271b2b77a04SHong Zhang 
272b2b77a04SHong Zhang #undef __FUNCT__
273b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
274b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
275b2b77a04SHong Zhang {
276b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
277b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
278b2b77a04SHong Zhang   PetscErrorCode ierr;
279b2b77a04SHong Zhang 
280b2b77a04SHong Zhang   PetscFunctionBegin;
281b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
282b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
283b1301b2eSHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
284bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
285b2b77a04SHong Zhang   PetscFunctionReturn(0);
286b2b77a04SHong Zhang }
287b2b77a04SHong Zhang 
288c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
289b2b77a04SHong Zhang #undef __FUNCT__
290b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
291b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
292b2b77a04SHong Zhang {
293b2b77a04SHong Zhang   PetscErrorCode  ierr;
294b2b77a04SHong Zhang   PetscScalar     *array;
295b2b77a04SHong Zhang 
296b2b77a04SHong Zhang   PetscFunctionBegin;
297b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
298b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
299b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
300b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
301b2b77a04SHong Zhang   PetscFunctionReturn(0);
302b2b77a04SHong Zhang }
303b2b77a04SHong Zhang 
304b2b77a04SHong Zhang #undef __FUNCT__
3053c3a9c75SAmlan Barua #define __FUNCT__ "MatGetVecs1DC_FFTW"
306c92418ccSAmlan Barua /*
307c92418ccSAmlan Barua    MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the
308c92418ccSAmlan Barua      parallel layout determined by FFTW-1D
309c92418ccSAmlan Barua 
310c92418ccSAmlan Barua */
3116a506ed8SAmlan Barua PetscErrorCode  MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bout)
312c92418ccSAmlan Barua {
313c92418ccSAmlan Barua   PetscErrorCode ierr;
314c92418ccSAmlan Barua   PetscMPIInt    size,rank;
315c92418ccSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
316c92418ccSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
317c92418ccSAmlan Barua //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
318c92418ccSAmlan Barua   PetscInt       N=fft->N;
319c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
320c92418ccSAmlan Barua   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
321c92418ccSAmlan Barua   ptrdiff_t      f_local_n1,f_local_1_end;
322c92418ccSAmlan Barua   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
323c92418ccSAmlan Barua   ptrdiff_t      b_local_n1,b_local_1_end;
324ae0a50aaSHong Zhang   fftw_complex   *data_fin,*data_fout,*data_bout;
325c92418ccSAmlan Barua 
326c92418ccSAmlan Barua   PetscFunctionBegin;
327c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
328c92418ccSAmlan Barua   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
329c92418ccSAmlan Barua #endif
330c92418ccSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
331c92418ccSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
332c92418ccSAmlan Barua   if (size == 1){
333c92418ccSAmlan Barua     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
334c92418ccSAmlan Barua   }
335c92418ccSAmlan Barua   else {
336c92418ccSAmlan Barua       if (ndim>1){
337c92418ccSAmlan Barua         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
338c92418ccSAmlan Barua       else {
339c92418ccSAmlan Barua           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
340c92418ccSAmlan Barua           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
341c92418ccSAmlan Barua           if (fin) {
342c92418ccSAmlan Barua             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
343c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
344c92418ccSAmlan Barua             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
345c92418ccSAmlan Barua           }
346c92418ccSAmlan Barua           if (fout) {
347c92418ccSAmlan Barua             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
348c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
349c92418ccSAmlan Barua             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
350c92418ccSAmlan Barua           }
351c92418ccSAmlan Barua           if (bout) {
352c92418ccSAmlan Barua             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
353c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
354c92418ccSAmlan Barua             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
355c92418ccSAmlan Barua           }
356c92418ccSAmlan Barua   }
357c92418ccSAmlan Barua   if (fin){
358c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
359c92418ccSAmlan Barua   }
360c92418ccSAmlan Barua   if (fout){
361c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
362c92418ccSAmlan Barua   }
363c92418ccSAmlan Barua   if (bout){
364c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
365c92418ccSAmlan Barua   }
366c92418ccSAmlan Barua   PetscFunctionReturn(0);
367c92418ccSAmlan Barua }
368c92418ccSAmlan Barua 
369c92418ccSAmlan Barua 
370c92418ccSAmlan Barua }
3713c3a9c75SAmlan Barua 
372*4f7415efSAmlan Barua 
373*4f7415efSAmlan Barua #undef __FUNCT__
374*4f7415efSAmlan Barua #define __FUNCT__ "MatGetVecsFFT"
375*4f7415efSAmlan Barua PetscErrorCode MatGetVecsFFT(Mat A,Vec *x,Vec *y,Vec *z)
376*4f7415efSAmlan Barua {
377*4f7415efSAmlan Barua   PetscErrorCode ierr;
378*4f7415efSAmlan Barua   PetscFunctionBegin;
379*4f7415efSAmlan Barua   ierr = PetscTryMethod(A,"MatGetVecsFFT_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
380*4f7415efSAmlan Barua   PetscFunctionReturn(0);
381*4f7415efSAmlan Barua }
382*4f7415efSAmlan Barua 
383*4f7415efSAmlan Barua #undef __FUNCT__
384*4f7415efSAmlan Barua #define __FUNCT__ "MatGetVecsFFT_FFTW"
385*4f7415efSAmlan Barua /*
386*4f7415efSAmlan Barua    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
387*4f7415efSAmlan Barua      parallel layout determined by FFTW
388*4f7415efSAmlan Barua 
389*4f7415efSAmlan Barua    Collective on Mat
390*4f7415efSAmlan Barua 
391*4f7415efSAmlan Barua    Input Parameter:
392*4f7415efSAmlan Barua .  mat - the matrix
393*4f7415efSAmlan Barua 
394*4f7415efSAmlan Barua    Output Parameter:
395*4f7415efSAmlan Barua +   fin - (optional) input vector of forward FFTW
396*4f7415efSAmlan Barua -   fout - (optional) output vector of forward FFTW
397*4f7415efSAmlan Barua 
398*4f7415efSAmlan Barua   Level: advanced
399*4f7415efSAmlan Barua 
400*4f7415efSAmlan Barua .seealso: MatCreateFFTW()
401*4f7415efSAmlan Barua */
402*4f7415efSAmlan Barua EXTERN_C_BEGIN
403*4f7415efSAmlan Barua PetscErrorCode  MatGetVecsFFT_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
404*4f7415efSAmlan Barua {
405*4f7415efSAmlan Barua   PetscErrorCode ierr;
406*4f7415efSAmlan Barua   PetscMPIInt    size,rank;
407*4f7415efSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
408*4f7415efSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
409*4f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
410*4f7415efSAmlan Barua   PetscInt       N=fft->N, N1, n1,vsize;
411*4f7415efSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
412*4f7415efSAmlan Barua 
413*4f7415efSAmlan Barua   PetscFunctionBegin;
414*4f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
415*4f7415efSAmlan Barua   PetscValidType(A,1);
416*4f7415efSAmlan Barua 
417*4f7415efSAmlan Barua   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
418*4f7415efSAmlan Barua   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
419*4f7415efSAmlan Barua   printf("size is %d\n",size);
420*4f7415efSAmlan Barua   if (size == 1){ /* sequential case */
421*4f7415efSAmlan Barua   printf("Routine is getting called\n");
422*4f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
423*4f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
424*4f7415efSAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
425*4f7415efSAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
426*4f7415efSAmlan Barua #else
427*4f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
428*4f7415efSAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
429*4f7415efSAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);}
430*4f7415efSAmlan Barua #endif
431*4f7415efSAmlan Barua   } else {
432*4f7415efSAmlan Barua     ptrdiff_t      alloc_local,local_n0,local_0_start;
433*4f7415efSAmlan Barua     ptrdiff_t      local_n1,local_1_end;
434*4f7415efSAmlan Barua     fftw_complex   *data_fin,*data_fout,data_bout;
435*4f7415efSAmlan Barua     double         *data_finr,*data_boutr ;
436*4f7415efSAmlan Barua     ptrdiff_t      local_1_start,temp;
437*4f7415efSAmlan Barua     switch (ndim){
438*4f7415efSAmlan Barua           case 1:
439*4f7415efSAmlan Barua                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet");
440*4f7415efSAmlan Barua           case 2:
441*4f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
442*4f7415efSAmlan Barua                  alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
443*4f7415efSAmlan Barua                  N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
444*4f7415efSAmlan Barua                  if (fin) {
445*4f7415efSAmlan Barua                            data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
446*4f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
447*4f7415efSAmlan Barua                            ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
448*4f7415efSAmlan Barua                            //printf("The code comes here with vector size %d\n",vsize);
449*4f7415efSAmlan Barua                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
450*4f7415efSAmlan Barua                           }
451*4f7415efSAmlan Barua                  if (fout) {
452*4f7415efSAmlan Barua                             data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
453*4f7415efSAmlan Barua                             ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
454*4f7415efSAmlan Barua                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
455*4f7415efSAmlan Barua                            }
456*4f7415efSAmlan Barua                  if (bout) {
457*4f7415efSAmlan Barua                            data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
458*4f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
459*4f7415efSAmlan Barua                            ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr);
460*4f7415efSAmlan Barua                            //printf("The code comes here with vector size %d\n",vsize);
461*4f7415efSAmlan Barua                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
462*4f7415efSAmlan Barua                           }
463*4f7415efSAmlan Barua 
464*4f7415efSAmlan Barua                            //printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
465*4f7415efSAmlan Barua 
466*4f7415efSAmlan Barua #else
467*4f7415efSAmlan Barua       /* Get local size */
468*4f7415efSAmlan Barua                  alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
469*4f7415efSAmlan Barua                  if (fin) {
470*4f7415efSAmlan Barua                            data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
471*4f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
472*4f7415efSAmlan Barua                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
473*4f7415efSAmlan Barua                           }
474*4f7415efSAmlan Barua                  if (fout) {
475*4f7415efSAmlan Barua                             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
476*4f7415efSAmlan Barua                             ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
477*4f7415efSAmlan Barua                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
478*4f7415efSAmlan Barua                            }
479*4f7415efSAmlan Barua                  if (bout) {
480*4f7415efSAmlan Barua                            data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
481*4f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
482*4f7415efSAmlan Barua                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
483*4f7415efSAmlan Barua                           }
484*4f7415efSAmlan Barua 
485*4f7415efSAmlan Barua      //printf("Hope this does not come here");
486*4f7415efSAmlan Barua #endif
487*4f7415efSAmlan Barua       break;
488*4f7415efSAmlan Barua 
489*4f7415efSAmlan Barua           case 3:
490*4f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
491*4f7415efSAmlan Barua                  alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
492*4f7415efSAmlan Barua                  N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
493*4f7415efSAmlan Barua                  if (fin) {
494*4f7415efSAmlan Barua                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
495*4f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
496*4f7415efSAmlan Barua                          ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
497*4f7415efSAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
498*4f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
499*4f7415efSAmlan Barua                          }
500*4f7415efSAmlan Barua                  if (fout) {
501*4f7415efSAmlan Barua                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
502*4f7415efSAmlan Barua                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
503*4f7415efSAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
504*4f7415efSAmlan Barua                           }
505*4f7415efSAmlan Barua                  if (bout) {
506*4f7415efSAmlan Barua                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
507*4f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
508*4f7415efSAmlan Barua                         // ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
509*4f7415efSAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
510*4f7415efSAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
511*4f7415efSAmlan Barua                           }
512*4f7415efSAmlan Barua 
513*4f7415efSAmlan Barua       //printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
514*4f7415efSAmlan Barua 
515*4f7415efSAmlan Barua #else
516*4f7415efSAmlan Barua                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet");
517*4f7415efSAmlan Barua #endif
518*4f7415efSAmlan Barua                  break;
519*4f7415efSAmlan Barua           default:
520*4f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
521*4f7415efSAmlan Barua                  temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
522*4f7415efSAmlan Barua                  printf("The value of temp is %ld\n",temp);
523*4f7415efSAmlan Barua                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
524*4f7415efSAmlan Barua                  alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
525*4f7415efSAmlan Barua                  N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
526*4f7415efSAmlan Barua                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
527*4f7415efSAmlan Barua 
528*4f7415efSAmlan Barua                  if (fin) {
529*4f7415efSAmlan Barua                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
530*4f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
531*4f7415efSAmlan Barua                          ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
532*4f7415efSAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
533*4f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
534*4f7415efSAmlan Barua                         }
535*4f7415efSAmlan Barua                  if (fout) {
536*4f7415efSAmlan Barua                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
537*4f7415efSAmlan Barua                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
538*4f7415efSAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
539*4f7415efSAmlan Barua                         }
540*4f7415efSAmlan Barua                  if (bout) {
541*4f7415efSAmlan Barua                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
542*4f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
543*4f7415efSAmlan Barua                          //ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
544*4f7415efSAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
545*4f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
546*4f7415efSAmlan Barua                         }
547*4f7415efSAmlan Barua 
548*4f7415efSAmlan Barua #else
549*4f7415efSAmlan Barua 
550*4f7415efSAmlan Barua                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet");
551*4f7415efSAmlan Barua #endif
552*4f7415efSAmlan Barua             break;
553*4f7415efSAmlan Barua           }
554*4f7415efSAmlan Barua   }
555*4f7415efSAmlan Barua   PetscFunctionReturn(0);
556*4f7415efSAmlan Barua }
557*4f7415efSAmlan Barua EXTERN_C_END
558*4f7415efSAmlan Barua 
559*4f7415efSAmlan Barua 
560*4f7415efSAmlan Barua 
561c92418ccSAmlan Barua #undef __FUNCT__
562b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW"
563b2b77a04SHong Zhang /*
564b2b77a04SHong Zhang    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
565b2b77a04SHong Zhang      parallel layout determined by FFTW
566b2b77a04SHong Zhang 
567b2b77a04SHong Zhang    Collective on Mat
568b2b77a04SHong Zhang 
569b2b77a04SHong Zhang    Input Parameter:
570b2b77a04SHong Zhang .  mat - the matrix
571b2b77a04SHong Zhang 
572b2b77a04SHong Zhang    Output Parameter:
573b2b77a04SHong Zhang +   fin - (optional) input vector of forward FFTW
574b2b77a04SHong Zhang -   fout - (optional) output vector of forward FFTW
575b2b77a04SHong Zhang 
576b2b77a04SHong Zhang   Level: advanced
577b2b77a04SHong Zhang 
578b2b77a04SHong Zhang .seealso: MatCreateFFTW()
579b2b77a04SHong Zhang */
580b2b77a04SHong Zhang PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
581b2b77a04SHong Zhang {
582b2b77a04SHong Zhang   PetscErrorCode ierr;
583b2b77a04SHong Zhang   PetscMPIInt    size,rank;
584b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
585b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
586c92418ccSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
5873c3a9c75SAmlan Barua   PetscInt       N=fft->N, N1, n1,vsize;
588e81bb599SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
589b2b77a04SHong Zhang 
590b2b77a04SHong Zhang   PetscFunctionBegin;
591b2b77a04SHong Zhang   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
592b2b77a04SHong Zhang   PetscValidType(A,1);
593b2b77a04SHong Zhang 
594b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
595b2b77a04SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
596b2b77a04SHong Zhang   if (size == 1){ /* sequential case */
597e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX)
598b2b77a04SHong Zhang     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
599b2b77a04SHong Zhang     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
600e5d7f247SAmlan Barua #else
601e81bb599SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
602e81bb599SAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
603e81bb599SAmlan Barua #endif
604b2b77a04SHong Zhang   } else {        /* mpi case */
605b2b77a04SHong Zhang     ptrdiff_t      alloc_local,local_n0,local_0_start;
6066882372aSHong Zhang     ptrdiff_t      local_n1,local_1_end;
607b2b77a04SHong Zhang     fftw_complex   *data_fin,*data_fout;
60851d1eed7SAmlan Barua     double         *data_finr ;
609b3a17365SAmlan Barua     ptrdiff_t      local_1_start,temp;
610c92418ccSAmlan Barua //    PetscInt ctr;
611c92418ccSAmlan Barua //    ptrdiff_t      ndim1,*pdim;
612c92418ccSAmlan Barua //    ndim1=(ptrdiff_t) ndim;
613c92418ccSAmlan Barua //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
61411902ff2SHong Zhang 
615c92418ccSAmlan Barua //    for(ctr=0;ctr<ndim;ctr++)
616f76f76beSAmlan Barua //        {k
617c92418ccSAmlan Barua //           pdim[ctr] = dim[ctr];
618c92418ccSAmlan Barua //       }
619b2b77a04SHong Zhang 
620f76f76beSAmlan Barua 
621f76f76beSAmlan Barua 
622b2b77a04SHong Zhang     switch (ndim){
623b2b77a04SHong Zhang     case 1:
6246882372aSHong Zhang       /* Get local size */
625e5d7f247SAmlan Barua       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
626c92418ccSAmlan Barua //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
6276882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
6286882372aSHong Zhang       if (fin) {
6296882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6306882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
6316882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6326882372aSHong Zhang       }
6336882372aSHong Zhang       if (fout) {
6346882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6356882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6366882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6376882372aSHong Zhang       }
638b2b77a04SHong Zhang       break;
639b2b77a04SHong Zhang     case 2:
6403c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6413c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
6423c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
6433c3a9c75SAmlan Barua       if (fin) {
6443c3a9c75SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
64554dd5118SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
6463c3a9c75SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
64709dd8a53SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
6483c3a9c75SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6493c3a9c75SAmlan Barua       }
6503c3a9c75SAmlan Barua       if (fout) {
65109dd8a53SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
65209dd8a53SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6533c3a9c75SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6543c3a9c75SAmlan Barua       }
655f76f76beSAmlan Barua       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
6563c3a9c75SAmlan Barua 
6573c3a9c75SAmlan Barua #else
658b2b77a04SHong Zhang       /* Get local size */
65964657d84SAmlan Barua      //printf("Hope this does not come here");
660b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
661b2b77a04SHong Zhang       if (fin) {
662b2b77a04SHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
663b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
664b2b77a04SHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
665b2b77a04SHong Zhang       }
666b2b77a04SHong Zhang       if (fout) {
667b2b77a04SHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
668b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
669b2b77a04SHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
670b2b77a04SHong Zhang       }
67164657d84SAmlan Barua      //printf("Hope this does not come here");
6723c3a9c75SAmlan Barua #endif
673b2b77a04SHong Zhang       break;
674b2b77a04SHong Zhang     case 3:
6756882372aSHong Zhang       /* Get local size */
6763c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
67751d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
67851d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
67951d1eed7SAmlan Barua       if (fin) {
68051d1eed7SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
68151d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
68251d1eed7SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
68351d1eed7SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
68451d1eed7SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
68551d1eed7SAmlan Barua       }
68651d1eed7SAmlan Barua       if (fout) {
68751d1eed7SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
68851d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
68951d1eed7SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
69051d1eed7SAmlan Barua       }
69151d1eed7SAmlan Barua       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
69251d1eed7SAmlan Barua 
69351d1eed7SAmlan Barua 
6943c3a9c75SAmlan Barua #else
6956882372aSHong Zhang       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
69611902ff2SHong Zhang //      printf("The quantity n is %d",n);
6976882372aSHong Zhang       if (fin) {
6986882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6996882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
7006882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
7016882372aSHong Zhang       }
7026882372aSHong Zhang       if (fout) {
7036882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7046882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
7056882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
7066882372aSHong Zhang       }
7073c3a9c75SAmlan Barua #endif
708b2b77a04SHong Zhang       break;
709b2b77a04SHong Zhang     default:
7106882372aSHong Zhang       /* Get local size */
7113c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
712b3a17365SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
713b3a17365SAmlan Barua       printf("The value of temp is %ld\n",temp);
714b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
715b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
716b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
717b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
718b3a17365SAmlan Barua       if (fin) {
719b3a17365SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
720b3a17365SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
721b3a17365SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
722b3a17365SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
723b3a17365SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
724b3a17365SAmlan Barua       }
725b3a17365SAmlan Barua       if (fout) {
726b3a17365SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
727b3a17365SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
728b3a17365SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
729b3a17365SAmlan Barua       }
730b3a17365SAmlan Barua 
7313c3a9c75SAmlan Barua #else
732c92418ccSAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
73311902ff2SHong Zhang //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
73411902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
73511902ff2SHong Zhang //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
73611902ff2SHong Zhang //      for(i=0;i<ndim;i++)
73711902ff2SHong Zhang //         {
73811902ff2SHong Zhang //          pdim[i]=dim[i];printf("%d",pdim[i]);
73911902ff2SHong Zhang //         }
74011902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
74111902ff2SHong Zhang //      printf("The quantity n is %d",n);
7426882372aSHong Zhang       if (fin) {
7436882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7446882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
7456882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
7466882372aSHong Zhang       }
7476882372aSHong Zhang       if (fout) {
7486882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7496882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
7506882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
7516882372aSHong Zhang       }
7523c3a9c75SAmlan Barua #endif
753b2b77a04SHong Zhang       break;
754b2b77a04SHong Zhang     }
755b2b77a04SHong Zhang   }
75654dd5118SAmlan Barua //  if (fin){
75754dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
75854dd5118SAmlan Barua //  }
75954dd5118SAmlan Barua //  if (fout){
76054dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
76154dd5118SAmlan Barua //  }
762b2b77a04SHong Zhang   PetscFunctionReturn(0);
763b2b77a04SHong Zhang }
764b2b77a04SHong Zhang 
765b2b77a04SHong Zhang #undef __FUNCT__
7663c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT"
7673c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
7683c3a9c75SAmlan Barua {
7693c3a9c75SAmlan Barua   PetscErrorCode ierr;
7703c3a9c75SAmlan Barua   PetscFunctionBegin;
7713c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
7723c3a9c75SAmlan Barua   PetscFunctionReturn(0);
7733c3a9c75SAmlan Barua }
77454efbe56SHong Zhang 
775b2b77a04SHong Zhang /*
7763c3a9c75SAmlan Barua       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block
7773c3a9c75SAmlan Barua   Input A, x, y
7783c3a9c75SAmlan Barua   A - FFTW matrix
77954dd5118SAmlan Barua   x - user data
780b2b77a04SHong Zhang   Options Database Keys:
781b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags
782b2b77a04SHong Zhang 
783b2b77a04SHong Zhang    Level: intermediate
784b2b77a04SHong Zhang 
785b2b77a04SHong Zhang */
7863c3a9c75SAmlan Barua 
7873c3a9c75SAmlan Barua EXTERN_C_BEGIN
7883c3a9c75SAmlan Barua #undef __FUNCT__
7893c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW"
7903c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
7913c3a9c75SAmlan Barua {
7923c3a9c75SAmlan Barua   PetscErrorCode ierr;
7933c3a9c75SAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
7943c3a9c75SAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
7953c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
7963c3a9c75SAmlan Barua   PetscInt       N=fft->N, N1, n1 ,NM;
797b223cc97SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
798b223cc97SAmlan Barua   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
799e5d7f247SAmlan Barua   PetscInt       i,j,k,rank,size,partial_dim;
8003c3a9c75SAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
801e5d7f247SAmlan Barua   ptrdiff_t      local_n1,local_1_start,temp;
8023c3a9c75SAmlan Barua   VecScatter     vecscat;
8033c3a9c75SAmlan Barua   IS             list1,list2;
8043c3a9c75SAmlan Barua 
805f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
806f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
8073c3a9c75SAmlan Barua   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
808f76f76beSAmlan Barua   printf("Local ownership starts at %d\n",low);
8093c3a9c75SAmlan Barua 
810e81bb599SAmlan Barua   if (size==1)
811e81bb599SAmlan Barua     {
8127536937bSAmlan Barua /*     switch (ndim){
813e81bb599SAmlan Barua      case 1:
814e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
815e81bb599SAmlan Barua           for (i=0;i<dim[0];i++)
816e81bb599SAmlan Barua              {
817e81bb599SAmlan Barua               indx1[i] = i;
818e81bb599SAmlan Barua              }
819e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
820e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
821e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
822e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
823e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
824b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
825b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
826e81bb599SAmlan Barua      break;
827e81bb599SAmlan Barua 
828e81bb599SAmlan Barua      case 2:
829e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
830e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
831e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
832e81bb599SAmlan Barua                 indx1[i*dim[1]+j] = i*dim[1] + j;
833e81bb599SAmlan Barua              }
834e81bb599SAmlan Barua           }
835e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
836e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
837e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
838e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
839e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
840b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
841b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
842e81bb599SAmlan Barua           break;
843e81bb599SAmlan Barua      case 3:
844e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
845e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
846e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
847e81bb599SAmlan Barua                 for (k=0;k<dim[2];k++){
848e81bb599SAmlan Barua                    indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
849e81bb599SAmlan Barua                 }
850e81bb599SAmlan Barua              }
851e81bb599SAmlan Barua           }
852e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
853e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
854e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
855e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
856e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
857b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
858b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
859e81bb599SAmlan Barua           break;
860e81bb599SAmlan Barua      default:
8617536937bSAmlan Barua */
8626971673cSAmlan Barua           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);
8636971673cSAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
8646971673cSAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8656971673cSAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8666971673cSAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
867b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
8686971673cSAmlan Barua           //ierr = ISDestroy(list1);CHKERRQ(ierr);
8697536937bSAmlan Barua  //         break;
8707536937bSAmlan Barua   //    }
871e81bb599SAmlan Barua     }
872e81bb599SAmlan Barua 
873e81bb599SAmlan Barua  else{
874e81bb599SAmlan Barua 
8753c3a9c75SAmlan Barua  switch (ndim){
8763c3a9c75SAmlan Barua  case 1:
87764657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
87864657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
87964657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
88064657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
88164657d84SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
88264657d84SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
88364657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
88464657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
88564657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
88664657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
88764657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
88864657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
88964657d84SAmlan Barua       break;
89064657d84SAmlan Barua #else
891e5d7f247SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
89264657d84SAmlan Barua #endif
8933c3a9c75SAmlan Barua   break;
8943c3a9c75SAmlan Barua  case 2:
895bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
896bd59e6c6SAmlan Barua       //PetscInt my_value;
897bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
898bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
899bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
900bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
901bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
902bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
903bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
904bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
905bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
906bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
907bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
908bd59e6c6SAmlan Barua       break;
909bd59e6c6SAmlan Barua #else
9103c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
9113c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
9123c3a9c75SAmlan Barua 
9135b3e41ffSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
9145b3e41ffSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
9155b3e41ffSAmlan Barua       printf("Val local_0_start = %ld\n",local_0_start);
9163c3a9c75SAmlan Barua 
9173c3a9c75SAmlan Barua       if (dim[1]%2==0)
9183c3a9c75SAmlan Barua         NM = dim[1]+2;
9193c3a9c75SAmlan Barua       else
9203c3a9c75SAmlan Barua         NM = dim[1]+1;
9213c3a9c75SAmlan Barua 
9223c3a9c75SAmlan Barua       for (i=0;i<local_n0;i++){
9233c3a9c75SAmlan Barua          for (j=0;j<dim[1];j++){
9243c3a9c75SAmlan Barua             tempindx = i*dim[1] + j;
9253c3a9c75SAmlan Barua             tempindx1 = i*NM + j;
9265b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
9273c3a9c75SAmlan Barua             indx2[tempindx]=low+tempindx1;
9285b3e41ffSAmlan Barua            // printf("Val tempindx1 = %d\n",tempindx1);
9295b3e41ffSAmlan Barua   //          printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
9305b3e41ffSAmlan Barua   //          printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
9315b3e41ffSAmlan Barua   //          printf("-------------------------\n",indx2[tempindx],rank);
9323c3a9c75SAmlan Barua         }
9333c3a9c75SAmlan Barua      }
9343c3a9c75SAmlan Barua 
9353c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
9363c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
9373c3a9c75SAmlan Barua 
938f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
939f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
941f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
942b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
943b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
944b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
945b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
9463c3a9c75SAmlan Barua       break;
947bd59e6c6SAmlan Barua #endif
9483c3a9c75SAmlan Barua 
9493c3a9c75SAmlan Barua  case 3:
950bd59e6c6SAmlan Barua       #if defined(PETSC_USE_COMPLEX)
951bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
952bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
953bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
954bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
955bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
956bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
957bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
958bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
960bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
961bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
962bd59e6c6SAmlan Barua       break;
963bd59e6c6SAmlan Barua #else
96451d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
96551d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
96651d1eed7SAmlan Barua 
96751d1eed7SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
96851d1eed7SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
96951d1eed7SAmlan Barua       printf("Val local_0_start = %ld\n",local_0_start);
97051d1eed7SAmlan Barua 
97151d1eed7SAmlan Barua       if (dim[2]%2==0)
97251d1eed7SAmlan Barua         NM = dim[2]+2;
97351d1eed7SAmlan Barua       else
97451d1eed7SAmlan Barua         NM = dim[2]+1;
97551d1eed7SAmlan Barua 
97651d1eed7SAmlan Barua       for (i=0;i<local_n0;i++){
97751d1eed7SAmlan Barua          for (j=0;j<dim[1];j++){
97851d1eed7SAmlan Barua             for (k=0;k<dim[2];k++){
97951d1eed7SAmlan Barua                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
98051d1eed7SAmlan Barua                tempindx1 = i*dim[1]*NM + j*NM + k;
98151d1eed7SAmlan Barua                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
98251d1eed7SAmlan Barua                indx2[tempindx]=low+tempindx1;
98351d1eed7SAmlan Barua             }
98451d1eed7SAmlan Barua            // printf("Val tempindx1 = %d\n",tempindx1);
98551d1eed7SAmlan Barua            // printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
98651d1eed7SAmlan Barua            // printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
98751d1eed7SAmlan Barua            // printf("-------------------------\n",indx2[tempindx],rank);
98851d1eed7SAmlan Barua         }
98951d1eed7SAmlan Barua      }
99051d1eed7SAmlan Barua 
99151d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
99251d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
99351d1eed7SAmlan Barua 
99451d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
99551d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
99651d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
99751d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
998b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
999b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1000b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1001b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
10023c3a9c75SAmlan Barua       break;
1003bd59e6c6SAmlan Barua #endif
10043c3a9c75SAmlan Barua 
10053c3a9c75SAmlan Barua  default:
1006bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1007bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1008bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1009bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1010bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1011bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1012bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1013bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1014bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1015bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1016bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1017bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1018bd59e6c6SAmlan Barua 
1019bd59e6c6SAmlan Barua 
1020bd59e6c6SAmlan Barua #else
1021e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1022e5d7f247SAmlan Barua       printf("The value of temp is %ld\n",temp);
1023e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1024e5d7f247SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1025e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1026e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1027e5d7f247SAmlan Barua 
1028e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
1029e5d7f247SAmlan Barua       printf("The value of partial dim is %d\n",partial_dim);
1030e5d7f247SAmlan Barua 
1031e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1032e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1033e5d7f247SAmlan Barua       printf("Val local_0_start = %ld\n",local_0_start);
1034e5d7f247SAmlan Barua 
1035e5d7f247SAmlan Barua       if (dim[ndim-1]%2==0)
1036ba6e06d0SAmlan Barua         NM = 2;
1037e5d7f247SAmlan Barua       else
1038ba6e06d0SAmlan Barua         NM = 1;
1039e5d7f247SAmlan Barua 
10406971673cSAmlan Barua       j = low;
10416971673cSAmlan Barua       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
10426971673cSAmlan Barua          {
10436971673cSAmlan Barua           indx1[i] = local_0_start*partial_dim + i;
10446971673cSAmlan Barua           indx2[i] = j;
1045ba6e06d0SAmlan Barua           //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
10466971673cSAmlan Barua           if (k%dim[ndim-1]==0)
10476971673cSAmlan Barua             { j+=NM;}
10486971673cSAmlan Barua           j++;
10496971673cSAmlan Barua          }
10506971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
10516971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
10526971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
10536971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10546971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10556971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1056b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1057b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1058b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1059b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
10603c3a9c75SAmlan Barua       break;
1061bd59e6c6SAmlan Barua #endif
10623c3a9c75SAmlan Barua   }
1063e81bb599SAmlan Barua  }
10643c3a9c75SAmlan Barua 
10653c3a9c75SAmlan Barua  return 0;
10663c3a9c75SAmlan Barua }
10673c3a9c75SAmlan Barua EXTERN_C_END
10683c3a9c75SAmlan Barua 
10693c3a9c75SAmlan Barua #undef __FUNCT__
10703c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT"
10713c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
10723c3a9c75SAmlan Barua {
10733c3a9c75SAmlan Barua   PetscErrorCode ierr;
10743c3a9c75SAmlan Barua   PetscFunctionBegin;
10753c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
10763c3a9c75SAmlan Barua   PetscFunctionReturn(0);
10773c3a9c75SAmlan Barua }
107854efbe56SHong Zhang 
10795b3e41ffSAmlan Barua /*
10805b3e41ffSAmlan Barua       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
10815b3e41ffSAmlan Barua   Input A, x, y
10825b3e41ffSAmlan Barua   A - FFTW matrix
10835b3e41ffSAmlan Barua   x - FFTW vector
10845b3e41ffSAmlan Barua   y - PETSc vector that user can read
10855b3e41ffSAmlan Barua   Options Database Keys:
10865b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
10875b3e41ffSAmlan Barua 
10885b3e41ffSAmlan Barua    Level: intermediate
10895b3e41ffSAmlan Barua 
10903c3a9c75SAmlan Barua */
10913c3a9c75SAmlan Barua 
10923c3a9c75SAmlan Barua EXTERN_C_BEGIN
10933c3a9c75SAmlan Barua #undef __FUNCT__
10945b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW"
10955b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
10965b3e41ffSAmlan Barua {
10975b3e41ffSAmlan Barua   PetscErrorCode ierr;
10985b3e41ffSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
10995b3e41ffSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
11005b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
11015b3e41ffSAmlan Barua   PetscInt       N=fft->N, N1, n1 ,NM;
1102b223cc97SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
1103b223cc97SAmlan Barua   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
1104ba6e06d0SAmlan Barua   PetscInt       i,j,k,rank,size,partial_dim;
11055b3e41ffSAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
1106ba6e06d0SAmlan Barua   ptrdiff_t      local_n1,local_1_start,temp;
11075b3e41ffSAmlan Barua   VecScatter     vecscat;
11085b3e41ffSAmlan Barua   IS             list1,list2;
11095b3e41ffSAmlan Barua 
11105b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
11115b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1112cfe1ae98SAmlan Barua   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);
11135b3e41ffSAmlan Barua 
1114e81bb599SAmlan Barua   if (size==1){
11157536937bSAmlan Barua /*
11165b3e41ffSAmlan Barua     switch (ndim){
11175b3e41ffSAmlan Barua     case 1:
1118e81bb599SAmlan Barua            ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
1119e81bb599SAmlan Barua           for (i=0;i<dim[0];i++)
1120e81bb599SAmlan Barua              {
1121e81bb599SAmlan Barua               indx1[i] = i;
1122e81bb599SAmlan Barua              }
1123e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1124e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1125e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1126e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1127e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1128b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
1129b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
1130e81bb599SAmlan Barua           break;
1131e81bb599SAmlan Barua 
1132e81bb599SAmlan Barua     case 2:
1133e81bb599SAmlan Barua          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
1134e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
1135e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
1136e81bb599SAmlan Barua                 indx1[i*dim[1]+j] = i*dim[1] + j;
1137e81bb599SAmlan Barua              }
1138e81bb599SAmlan Barua           }
1139e81bb599SAmlan Barua          ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1140e81bb599SAmlan Barua          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1141e81bb599SAmlan Barua          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1142e81bb599SAmlan Barua          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1143e81bb599SAmlan Barua          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1144b223cc97SAmlan Barua          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1145b223cc97SAmlan Barua          ierr = PetscFree(indx1);CHKERRQ(ierr);
1146e81bb599SAmlan Barua          break;
1147e81bb599SAmlan Barua     case 3:
1148e81bb599SAmlan Barua          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1149e81bb599SAmlan Barua          for (i=0;i<dim[0];i++){
1150e81bb599SAmlan Barua             for (j=0;j<dim[1];j++){
1151e81bb599SAmlan Barua                for (k=0;k<dim[2];k++){
1152e81bb599SAmlan Barua                   indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
1153e81bb599SAmlan Barua                }
1154e81bb599SAmlan Barua             }
1155e81bb599SAmlan Barua          }
1156e81bb599SAmlan Barua          ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1157e81bb599SAmlan Barua          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1158e81bb599SAmlan Barua          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1159e81bb599SAmlan Barua          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1160e81bb599SAmlan Barua          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1161b223cc97SAmlan Barua          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1162b223cc97SAmlan Barua          ierr = PetscFree(indx1);CHKERRQ(ierr);
1163e81bb599SAmlan Barua          break;
1164e81bb599SAmlan Barua     default:
11657536937bSAmlan Barua */
11666971673cSAmlan Barua          ierr = ISCreateStride(comm,N,0,1,&list1);
11676971673cSAmlan Barua          //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF);
11686971673cSAmlan Barua          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
11696971673cSAmlan Barua          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
11706971673cSAmlan Barua          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
11716971673cSAmlan Barua          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1172b223cc97SAmlan Barua          ierr = ISDestroy(&list1);CHKERRQ(ierr);
11737536937bSAmlan Barua   //       break;
11747536937bSAmlan Barua    // }
1175e81bb599SAmlan Barua   }
1176e81bb599SAmlan Barua   else{
1177e81bb599SAmlan Barua 
1178e81bb599SAmlan Barua  switch (ndim){
1179e81bb599SAmlan Barua  case 1:
118064657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
118164657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
118264657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
118364657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
118464657d84SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
118564657d84SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
118664657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
118764657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
118864657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
118964657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
119064657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
119164657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
119264657d84SAmlan Barua       break;
119364657d84SAmlan Barua 
119464657d84SAmlan Barua #else
11956a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
119664657d84SAmlan Barua #endif
11975b3e41ffSAmlan Barua   break;
11985b3e41ffSAmlan Barua  case 2:
1199bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1200bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1201bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1202bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
1203bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1204bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1205bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1206bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1207bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1208bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1209bd59e6c6SAmlan Barua       break;
1210bd59e6c6SAmlan Barua #else
12115b3e41ffSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
12125b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
12135b3e41ffSAmlan Barua 
12145b3e41ffSAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
12155b3e41ffSAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
12165b3e41ffSAmlan Barua      printf("Val local_0_start = %ld\n",local_0_start);
12175b3e41ffSAmlan Barua 
12185b3e41ffSAmlan Barua      if (dim[1]%2==0)
12195b3e41ffSAmlan Barua       NM = dim[1]+2;
12205b3e41ffSAmlan Barua     else
12215b3e41ffSAmlan Barua       NM = dim[1]+1;
12225b3e41ffSAmlan Barua 
12235b3e41ffSAmlan Barua 
12245b3e41ffSAmlan Barua 
12255b3e41ffSAmlan Barua      for (i=0;i<local_n0;i++){
12265b3e41ffSAmlan Barua         for (j=0;j<dim[1];j++){
12275b3e41ffSAmlan Barua             tempindx = i*dim[1] + j;
12285b3e41ffSAmlan Barua             tempindx1 = i*NM + j;
12295b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
12305b3e41ffSAmlan Barua             indx2[tempindx]=low+tempindx1;
1231cfe1ae98SAmlan Barua        //     printf("Val tempindx1 = %d\n",tempindx1);
1232cfe1ae98SAmlan Barua        //     printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1233cfe1ae98SAmlan Barua        //     printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1234cfe1ae98SAmlan Barua        //     printf("-------------------------\n",indx2[tempindx],rank);
12355b3e41ffSAmlan Barua         }
12365b3e41ffSAmlan Barua      }
12375b3e41ffSAmlan Barua 
12385b3e41ffSAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
12395b3e41ffSAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
12405b3e41ffSAmlan Barua 
12415b3e41ffSAmlan Barua      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
12425b3e41ffSAmlan Barua      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
12435b3e41ffSAmlan Barua      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
12445b3e41ffSAmlan Barua      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1245b223cc97SAmlan Barua      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1246b223cc97SAmlan Barua      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1247b223cc97SAmlan Barua      ierr = PetscFree(indx1);CHKERRQ(ierr);
1248b223cc97SAmlan Barua      ierr = PetscFree(indx2);CHKERRQ(ierr);
12495b3e41ffSAmlan Barua   break;
1250bd59e6c6SAmlan Barua #endif
12515b3e41ffSAmlan Barua  case 3:
1252bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1253bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1254bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1255bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1256bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1257bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1258bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1259bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1260bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1261bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1262bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1263bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1264bd59e6c6SAmlan Barua       break;
1265bd59e6c6SAmlan Barua #else
1266bd59e6c6SAmlan Barua 
126751d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
126851d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
126951d1eed7SAmlan Barua 
127051d1eed7SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
127151d1eed7SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
127251d1eed7SAmlan Barua      printf("Val local_0_start = %ld\n",local_0_start);
127351d1eed7SAmlan Barua 
127451d1eed7SAmlan Barua      if (dim[2]%2==0)
127551d1eed7SAmlan Barua       NM = dim[2]+2;
127651d1eed7SAmlan Barua     else
127751d1eed7SAmlan Barua       NM = dim[2]+1;
127851d1eed7SAmlan Barua 
127951d1eed7SAmlan Barua      for (i=0;i<local_n0;i++){
128051d1eed7SAmlan Barua         for (j=0;j<dim[1];j++){
128151d1eed7SAmlan Barua            for (k=0;k<dim[2];k++){
128251d1eed7SAmlan Barua               tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
128351d1eed7SAmlan Barua               tempindx1 = i*dim[1]*NM + j*NM + k;
128451d1eed7SAmlan Barua               indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
128551d1eed7SAmlan Barua               indx2[tempindx]=low+tempindx1;
128651d1eed7SAmlan Barua            }
128751d1eed7SAmlan Barua         //    printf("Val tempindx1 = %d\n",tempindx1);
128851d1eed7SAmlan Barua         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
128951d1eed7SAmlan Barua         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
129051d1eed7SAmlan Barua         //    printf("-------------------------\n",indx2[tempindx],rank);
129151d1eed7SAmlan Barua         }
129251d1eed7SAmlan Barua      }
129351d1eed7SAmlan Barua 
129451d1eed7SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
129551d1eed7SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
129651d1eed7SAmlan Barua 
129751d1eed7SAmlan Barua      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
129851d1eed7SAmlan Barua      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
129951d1eed7SAmlan Barua      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
130051d1eed7SAmlan Barua      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1301b223cc97SAmlan Barua      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1302b223cc97SAmlan Barua      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1303b223cc97SAmlan Barua      ierr = PetscFree(indx1);CHKERRQ(ierr);
1304b223cc97SAmlan Barua      ierr = PetscFree(indx2);CHKERRQ(ierr);
13055b3e41ffSAmlan Barua   break;
1306bd59e6c6SAmlan Barua #endif
13075b3e41ffSAmlan Barua   default:
1308bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1309bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1310bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1311bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1312bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1313bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1314bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1315bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1316bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1317bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1318bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1319bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1320bd59e6c6SAmlan Barua #else
1321ba6e06d0SAmlan Barua      temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1322ba6e06d0SAmlan Barua      printf("The value of temp is %ld\n",temp);
1323ba6e06d0SAmlan Barua      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1324ba6e06d0SAmlan Barua      alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1325ba6e06d0SAmlan Barua      N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1326ba6e06d0SAmlan Barua      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1327ba6e06d0SAmlan Barua 
1328ba6e06d0SAmlan Barua      partial_dim = fftw->partial_dim;
1329ba6e06d0SAmlan Barua      printf("The value of partial dim is %d\n",partial_dim);
1330ba6e06d0SAmlan Barua 
1331ba6e06d0SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1332ba6e06d0SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1333ba6e06d0SAmlan Barua      printf("Val local_0_start = %ld\n",local_0_start);
1334ba6e06d0SAmlan Barua 
1335ba6e06d0SAmlan Barua      if (dim[ndim-1]%2==0)
1336ba6e06d0SAmlan Barua        NM = 2;
1337ba6e06d0SAmlan Barua      else
1338ba6e06d0SAmlan Barua        NM = 1;
1339ba6e06d0SAmlan Barua 
1340ba6e06d0SAmlan Barua      j = low;
1341ba6e06d0SAmlan Barua      for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1342ba6e06d0SAmlan Barua         {
1343ba6e06d0SAmlan Barua          indx1[i] = local_0_start*partial_dim + i;
1344ba6e06d0SAmlan Barua          indx2[i] = j;
1345ba6e06d0SAmlan Barua          //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
1346ba6e06d0SAmlan Barua          if (k%dim[ndim-1]==0)
1347ba6e06d0SAmlan Barua            { j+=NM;}
1348ba6e06d0SAmlan Barua          j++;
1349ba6e06d0SAmlan Barua         }
1350ba6e06d0SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1351ba6e06d0SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1352ba6e06d0SAmlan Barua 
1353ba6e06d0SAmlan Barua       //ISView(list1,PETSC_VIEWER_STDOUT_SELF);
1354ba6e06d0SAmlan Barua 
1355ba6e06d0SAmlan Barua 
1356ba6e06d0SAmlan Barua      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1357ba6e06d0SAmlan Barua      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1358ba6e06d0SAmlan Barua      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1359ba6e06d0SAmlan Barua      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1360b223cc97SAmlan Barua      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1361b223cc97SAmlan Barua      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1362b223cc97SAmlan Barua      ierr = PetscFree(indx1);CHKERRQ(ierr);
1363b223cc97SAmlan Barua      ierr = PetscFree(indx2);CHKERRQ(ierr);
1364ba6e06d0SAmlan Barua 
13655b3e41ffSAmlan Barua      break;
1366bd59e6c6SAmlan Barua #endif
13675b3e41ffSAmlan Barua  }
1368e81bb599SAmlan Barua  }
13695b3e41ffSAmlan Barua  return 0;
13705b3e41ffSAmlan Barua }
13715b3e41ffSAmlan Barua EXTERN_C_END
13725b3e41ffSAmlan Barua 
13735b3e41ffSAmlan Barua EXTERN_C_BEGIN
13745b3e41ffSAmlan Barua #undef __FUNCT__
13753c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
13763c3a9c75SAmlan Barua /*
13773c3a9c75SAmlan Barua       MatCreate_FFTW - Creates a matrix object that provides FFT
13783c3a9c75SAmlan Barua   via the external package FFTW
13793c3a9c75SAmlan Barua   Options Database Keys:
13803c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
13813c3a9c75SAmlan Barua 
13823c3a9c75SAmlan Barua    Level: intermediate
13833c3a9c75SAmlan Barua 
13843c3a9c75SAmlan Barua */
13853c3a9c75SAmlan Barua 
1386b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A)
1387b2b77a04SHong Zhang {
1388b2b77a04SHong Zhang   PetscErrorCode ierr;
1389b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
1390b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1391b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1392b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1393b2b77a04SHong Zhang   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1394b2b77a04SHong Zhang   PetscBool      flg;
1395b3a17365SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr,N1;
139611902ff2SHong Zhang   PetscMPIInt    size,rank;
1397b3a17365SAmlan Barua   ptrdiff_t      *pdim, temp;
13984a16a297SSean Farley   ptrdiff_t      local_n1,local_1_start,local_1_end;
1399b2b77a04SHong Zhang 
1400b2b77a04SHong Zhang   PetscFunctionBegin;
1401b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
140211902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
140354dd5118SAmlan Barua   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1404c92418ccSAmlan Barua 
140511902ff2SHong Zhang   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
140611902ff2SHong Zhang   pdim[0] = dim[0];
140711902ff2SHong Zhang   for(ctr=1;ctr<ndim;ctr++)
140811902ff2SHong Zhang       {
14096882372aSHong Zhang           partial_dim *= dim[ctr];
141011902ff2SHong Zhang           pdim[ctr] = dim[ctr];
14116882372aSHong Zhang       }
14123c3a9c75SAmlan Barua 
1413b2b77a04SHong Zhang   if (size == 1) {
1414e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1415b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1416b2b77a04SHong Zhang     n = N;
1417e81bb599SAmlan Barua #else
1418e81bb599SAmlan Barua     int tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1];
1419e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1420e81bb599SAmlan Barua     n = tot_dim;
1421e81bb599SAmlan Barua #endif
1422e81bb599SAmlan Barua 
1423b2b77a04SHong Zhang   } else {
1424b223cc97SAmlan Barua     ptrdiff_t alloc_local,local_n0,local_0_start;//local_n1,local_1_end;
1425b2b77a04SHong Zhang     switch (ndim){
1426b2b77a04SHong Zhang     case 1:
14273c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
14283c3a9c75SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1429e5d7f247SAmlan Barua #else
14306882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
14316882372aSHong Zhang       n = (PetscInt)local_n0;
14326882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1433e5d7f247SAmlan Barua #endif
14343c3a9c75SAmlan Barua //      PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW);
1435b2b77a04SHong Zhang       break;
1436b2b77a04SHong Zhang     case 2:
14375b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
1438b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1439b2b77a04SHong Zhang       /*
1440b2b77a04SHong Zhang        PetscMPIInt    rank;
1441b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1442b2b77a04SHong Zhang        PetscSynchronizedFlush(comm);
1443b2b77a04SHong Zhang        */
1444b2b77a04SHong Zhang       n = (PetscInt)local_n0*dim[1];
1445b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
14465b3e41ffSAmlan Barua #else
14475b3e41ffSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
14485b3e41ffSAmlan Barua       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
14495b3e41ffSAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
14505b3e41ffSAmlan Barua #endif
1451b2b77a04SHong Zhang       break;
1452b2b77a04SHong Zhang     case 3:
145311902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
145451d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
145551d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
14566882372aSHong Zhang       n = (PetscInt)local_n0*dim[1]*dim[2];
14576882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
145851d1eed7SAmlan Barua #else
145951d1eed7SAmlan Barua       printf("The code comes here\n");
146051d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
146151d1eed7SAmlan Barua       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
146251d1eed7SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
146351d1eed7SAmlan Barua #endif
1464b2b77a04SHong Zhang       break;
1465b2b77a04SHong Zhang     default:
1466b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
146711902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1468c92418ccSAmlan Barua //      printf("The value of alloc local is %ld from process %d\n",alloc_local,rank);
146911902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
14706882372aSHong Zhang       n = (PetscInt)local_n0*partial_dim;
147111902ff2SHong Zhang //      printf("New partial dimension is %d %d %d",n,N,ndim);
14726882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1473b3a17365SAmlan Barua #else
1474b3a17365SAmlan Barua       temp = pdim[ndim-1];
1475b3a17365SAmlan Barua       pdim[ndim-1]= temp/2 + 1;
1476b3a17365SAmlan Barua       printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]);
1477b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1478b3a17365SAmlan Barua       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1479b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1480b3a17365SAmlan Barua       pdim[ndim-1] = temp;
1481b3a17365SAmlan Barua       printf("For Multi dim case n = %d, N1  = %d\n",n,N1);
1482b3a17365SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1483b3a17365SAmlan Barua #endif
1484b2b77a04SHong Zhang       break;
1485b2b77a04SHong Zhang     }
1486b2b77a04SHong Zhang   }
1487b2b77a04SHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1488b2b77a04SHong Zhang   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1489b2b77a04SHong Zhang   fft->data = (void*)fftw;
1490b2b77a04SHong Zhang 
1491b2b77a04SHong Zhang   fft->n           = n;
1492c92418ccSAmlan Barua   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1493e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
1494c92418ccSAmlan Barua   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1495b1301b2eSHong Zhang   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1496c92418ccSAmlan Barua 
1497b2b77a04SHong Zhang   fftw->p_forward  = 0;
1498b2b77a04SHong Zhang   fftw->p_backward = 0;
1499b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1500b2b77a04SHong Zhang 
1501b2b77a04SHong Zhang   if (size == 1){
1502b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1503b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1504b2b77a04SHong Zhang   } else {
1505b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1506b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1507b2b77a04SHong Zhang   }
1508b2b77a04SHong Zhang   fft->matdestroy          = MatDestroy_FFTW;
15096a506ed8SAmlan Barua // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D
1510b2b77a04SHong Zhang   A->ops->getvecs       = MatGetVecs_FFTW;
1511b2b77a04SHong Zhang   A->assembled          = PETSC_TRUE;
1512*4f7415efSAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFT_C","MatGetVecsFFT_FFTW",MatGetVecsFFT_FFTW);
15133c3a9c75SAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
15145b3e41ffSAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
1515b2b77a04SHong Zhang 
1516b2b77a04SHong Zhang   /* get runtime options */
1517b2b77a04SHong Zhang   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1518b2b77a04SHong Zhang     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1519b2b77a04SHong Zhang     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1520b2b77a04SHong Zhang   PetscOptionsEnd();
1521b2b77a04SHong Zhang   PetscFunctionReturn(0);
1522b2b77a04SHong Zhang }
1523b2b77a04SHong Zhang EXTERN_C_END
15243c3a9c75SAmlan Barua 
15253c3a9c75SAmlan Barua 
15263c3a9c75SAmlan Barua 
15273c3a9c75SAmlan Barua 
1528