xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 9496c9ae50fa19016b6b018f20f729c3b1a8df3c)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14e5d7f247SAmlan Barua   PetscInt   partial_dim;
15b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
16b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
19b2b77a04SHong Zhang } Mat_FFTW;
20b2b77a04SHong Zhang 
21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
274be45526SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); // to be removed!
28b2b77a04SHong Zhang 
29b2b77a04SHong Zhang #undef __FUNCT__
30b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
31b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
32b2b77a04SHong Zhang {
33b2b77a04SHong Zhang   PetscErrorCode ierr;
34b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
35b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
36b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
37b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
38b2b77a04SHong Zhang 
39b2b77a04SHong Zhang   PetscFunctionBegin;
40b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
41b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
42b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
43b2b77a04SHong Zhang     switch (ndim){
44b2b77a04SHong Zhang     case 1:
4558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
46b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
4758a912c5SAmlan Barua #else
4858a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
4958a912c5SAmlan Barua #endif
50b2b77a04SHong Zhang       break;
51b2b77a04SHong Zhang     case 2:
5258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
53b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
5458a912c5SAmlan Barua #else
5558a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
5658a912c5SAmlan Barua #endif
57b2b77a04SHong Zhang       break;
58b2b77a04SHong Zhang     case 3:
5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
60b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6158a912c5SAmlan Barua #else
6258a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
6358a912c5SAmlan Barua #endif
64b2b77a04SHong Zhang       break;
65b2b77a04SHong Zhang     default:
6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
67b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6858a912c5SAmlan Barua #else
6958a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
7058a912c5SAmlan Barua #endif
71b2b77a04SHong Zhang       break;
72b2b77a04SHong Zhang     }
73b2b77a04SHong Zhang     fftw->finarray  = x_array;
74b2b77a04SHong Zhang     fftw->foutarray = y_array;
75b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
76b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
77b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
78b2b77a04SHong Zhang   } else { /* use existing plan */
79b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
80b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
81b2b77a04SHong Zhang     } else {
82b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
83b2b77a04SHong Zhang     }
84b2b77a04SHong Zhang   }
85b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
86b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
87b2b77a04SHong Zhang   PetscFunctionReturn(0);
88b2b77a04SHong Zhang }
89b2b77a04SHong Zhang 
90b2b77a04SHong Zhang #undef __FUNCT__
91b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
92b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
93b2b77a04SHong Zhang {
94b2b77a04SHong Zhang   PetscErrorCode ierr;
95b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
96b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
97b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
98b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
99b2b77a04SHong Zhang 
100b2b77a04SHong Zhang   PetscFunctionBegin;
101b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
102b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
103b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
104b2b77a04SHong Zhang     switch (ndim){
105b2b77a04SHong Zhang     case 1:
10658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
107b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
10858a912c5SAmlan Barua #else
109e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
11058a912c5SAmlan Barua #endif
111b2b77a04SHong Zhang       break;
112b2b77a04SHong Zhang     case 2:
11358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
114b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
11558a912c5SAmlan Barua #else
116e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
11758a912c5SAmlan Barua #endif
118b2b77a04SHong Zhang       break;
119b2b77a04SHong Zhang     case 3:
12058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
121b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12258a912c5SAmlan Barua #else
123e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
12458a912c5SAmlan Barua #endif
125b2b77a04SHong Zhang       break;
126b2b77a04SHong Zhang     default:
12758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
128b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12958a912c5SAmlan Barua #else
130e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
13158a912c5SAmlan Barua #endif
132b2b77a04SHong Zhang       break;
133b2b77a04SHong Zhang     }
134b2b77a04SHong Zhang     fftw->binarray  = x_array;
135b2b77a04SHong Zhang     fftw->boutarray = y_array;
136b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
137b2b77a04SHong Zhang   } else { /* use existing plan */
138b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
139b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
140b2b77a04SHong Zhang     } else {
141b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
142b2b77a04SHong Zhang     }
143b2b77a04SHong Zhang   }
144b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
145b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
146b2b77a04SHong Zhang   PetscFunctionReturn(0);
147b2b77a04SHong Zhang }
148b2b77a04SHong Zhang 
149b2b77a04SHong Zhang #undef __FUNCT__
150b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
151b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
152b2b77a04SHong Zhang {
153b2b77a04SHong Zhang   PetscErrorCode ierr;
154b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
155b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
156b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
157c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
158b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
159b2b77a04SHong Zhang 
160b2b77a04SHong Zhang   PetscFunctionBegin;
161b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
162b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
163b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
164b2b77a04SHong Zhang     switch (ndim){
165b2b77a04SHong Zhang     case 1:
1663c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
167b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
168ae0a50aaSHong Zhang #else
169ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
1703c3a9c75SAmlan Barua #endif
171b2b77a04SHong Zhang       break;
172b2b77a04SHong Zhang     case 2:
1733c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
174b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1753c3a9c75SAmlan Barua #else
1763c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1773c3a9c75SAmlan Barua #endif
178b2b77a04SHong Zhang       break;
179b2b77a04SHong Zhang     case 3:
1803c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
181b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1823c3a9c75SAmlan Barua #else
18351d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1843c3a9c75SAmlan Barua #endif
185b2b77a04SHong Zhang       break;
186b2b77a04SHong Zhang     default:
1873c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
188c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1893c3a9c75SAmlan Barua #else
1903c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1913c3a9c75SAmlan Barua #endif
192b2b77a04SHong Zhang       break;
193b2b77a04SHong Zhang     }
194b2b77a04SHong Zhang     fftw->finarray  = x_array;
195b2b77a04SHong Zhang     fftw->foutarray = y_array;
196b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
197b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
198b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
199b2b77a04SHong Zhang   } else { /* use existing plan */
200b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
201b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
202b2b77a04SHong Zhang     } else {
203b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
204b2b77a04SHong Zhang     }
205b2b77a04SHong Zhang   }
206b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
207b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
208b2b77a04SHong Zhang   PetscFunctionReturn(0);
209b2b77a04SHong Zhang }
210b2b77a04SHong Zhang 
211b2b77a04SHong Zhang #undef __FUNCT__
212b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
213b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
214b2b77a04SHong Zhang {
215b2b77a04SHong Zhang   PetscErrorCode ierr;
216b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
217b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
218b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
219c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
220b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
221b2b77a04SHong Zhang 
222b2b77a04SHong Zhang   PetscFunctionBegin;
223b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
224b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
225b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
226b2b77a04SHong Zhang     switch (ndim){
227b2b77a04SHong Zhang     case 1:
2283c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
229b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
230ae0a50aaSHong Zhang #else
231ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
2323c3a9c75SAmlan Barua #endif
233b2b77a04SHong Zhang       break;
234b2b77a04SHong Zhang     case 2:
2353c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
236b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2373c3a9c75SAmlan Barua #else
2383c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2393c3a9c75SAmlan Barua #endif
240b2b77a04SHong Zhang       break;
241b2b77a04SHong Zhang     case 3:
2423c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
243b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2443c3a9c75SAmlan Barua #else
2453c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2463c3a9c75SAmlan Barua #endif
247b2b77a04SHong Zhang       break;
248b2b77a04SHong Zhang     default:
2493c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
250c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2513c3a9c75SAmlan Barua #else
2523c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2533c3a9c75SAmlan Barua #endif
254b2b77a04SHong Zhang       break;
255b2b77a04SHong Zhang     }
256b2b77a04SHong Zhang     fftw->binarray  = x_array;
257b2b77a04SHong Zhang     fftw->boutarray = y_array;
258b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
259b2b77a04SHong Zhang   } else { /* use existing plan */
260b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
261b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
262b2b77a04SHong Zhang     } else {
263b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
264b2b77a04SHong Zhang     }
265b2b77a04SHong Zhang   }
266b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
267b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
268b2b77a04SHong Zhang   PetscFunctionReturn(0);
269b2b77a04SHong Zhang }
270b2b77a04SHong Zhang 
271b2b77a04SHong Zhang #undef __FUNCT__
272b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
273b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
274b2b77a04SHong Zhang {
275b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
276b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
277b2b77a04SHong Zhang   PetscErrorCode ierr;
278b2b77a04SHong Zhang 
279b2b77a04SHong Zhang   PetscFunctionBegin;
280b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
281b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
282b1301b2eSHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
283bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
284b2b77a04SHong Zhang   PetscFunctionReturn(0);
285b2b77a04SHong Zhang }
286b2b77a04SHong Zhang 
287c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
288b2b77a04SHong Zhang #undef __FUNCT__
289b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
290b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
291b2b77a04SHong Zhang {
292b2b77a04SHong Zhang   PetscErrorCode  ierr;
293b2b77a04SHong Zhang   PetscScalar     *array;
294b2b77a04SHong Zhang 
295b2b77a04SHong Zhang   PetscFunctionBegin;
296b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
297b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
298b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
299b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
300b2b77a04SHong Zhang   PetscFunctionReturn(0);
301b2b77a04SHong Zhang }
302b2b77a04SHong Zhang 
303b2b77a04SHong Zhang #undef __FUNCT__
3044be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_1DC"
305c92418ccSAmlan Barua /*
3064be45526SHong Zhang     - Get Vectors(s) compatible with matrix, i.e. with the
307c92418ccSAmlan Barua      parallel layout determined by FFTW-1D
308c92418ccSAmlan Barua 
309c92418ccSAmlan Barua */
3104be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_1DC(Mat A,Vec *fin,Vec *fout,Vec *bout)
311c92418ccSAmlan Barua {
312c92418ccSAmlan Barua   PetscErrorCode ierr;
313c92418ccSAmlan Barua   PetscMPIInt    size,rank;
314c92418ccSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
315c92418ccSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
316c92418ccSAmlan Barua //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
317c92418ccSAmlan Barua   PetscInt       N=fft->N;
318c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
319c92418ccSAmlan Barua   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
320c92418ccSAmlan Barua   ptrdiff_t      f_local_n1,f_local_1_end;
321c92418ccSAmlan Barua   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
322c92418ccSAmlan Barua   ptrdiff_t      b_local_n1,b_local_1_end;
323ae0a50aaSHong Zhang   fftw_complex   *data_fin,*data_fout,*data_bout;
324c92418ccSAmlan Barua 
325c92418ccSAmlan Barua   PetscFunctionBegin;
326c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
327c92418ccSAmlan Barua   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
328c92418ccSAmlan Barua #endif
329c92418ccSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
330c92418ccSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
331c92418ccSAmlan Barua   if (size == 1){
332c92418ccSAmlan Barua     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
333c92418ccSAmlan Barua   }
334c92418ccSAmlan Barua   else {
335c92418ccSAmlan Barua       if (ndim>1){
336c92418ccSAmlan Barua         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
337c92418ccSAmlan Barua       else {
338c92418ccSAmlan Barua           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
339c92418ccSAmlan Barua           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
340c92418ccSAmlan Barua           if (fin) {
341c92418ccSAmlan Barua             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
342c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
343c92418ccSAmlan Barua             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
344c92418ccSAmlan Barua           }
345c92418ccSAmlan Barua           if (fout) {
346c92418ccSAmlan Barua             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
347c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
348c92418ccSAmlan Barua             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
349c92418ccSAmlan Barua           }
350c92418ccSAmlan Barua           if (bout) {
351c92418ccSAmlan Barua             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
352c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
353c92418ccSAmlan Barua             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
354c92418ccSAmlan Barua           }
355c92418ccSAmlan Barua   }
356c92418ccSAmlan Barua   if (fin){
357c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
358c92418ccSAmlan Barua   }
359c92418ccSAmlan Barua   if (fout){
360c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
361c92418ccSAmlan Barua   }
362c92418ccSAmlan Barua   if (bout){
363c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
364c92418ccSAmlan Barua   }
365c92418ccSAmlan Barua   PetscFunctionReturn(0);
366c92418ccSAmlan Barua }
367c92418ccSAmlan Barua 
368c92418ccSAmlan Barua 
369c92418ccSAmlan Barua }
3703c3a9c75SAmlan Barua 
3714f7415efSAmlan Barua #undef __FUNCT__
3724be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW"
3734be45526SHong Zhang /*@
374b2aa233eSHong Zhang    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
3754f7415efSAmlan Barua      parallel layout determined by FFTW
3764f7415efSAmlan Barua 
3774f7415efSAmlan Barua    Collective on Mat
3784f7415efSAmlan Barua 
3794f7415efSAmlan Barua    Input Parameter:
3804f7415efSAmlan Barua .  mat - the matrix
3814f7415efSAmlan Barua 
3824f7415efSAmlan Barua    Output Parameter:
3834f7415efSAmlan Barua +   fin - (optional) input vector of forward FFTW
3844f7415efSAmlan Barua -   fout - (optional) output vector of forward FFTW
3854f7415efSAmlan Barua 
3864f7415efSAmlan Barua   Level: advanced
3874f7415efSAmlan Barua 
3884f7415efSAmlan Barua .seealso: MatCreateFFTW()
3894be45526SHong Zhang @*/
3904be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
3914be45526SHong Zhang {
3924be45526SHong Zhang   PetscErrorCode ierr;
3934be45526SHong Zhang   PetscFunctionBegin;
3944be45526SHong Zhang   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
3954be45526SHong Zhang   PetscFunctionReturn(0);
3964be45526SHong Zhang }
3974be45526SHong Zhang 
3984f7415efSAmlan Barua EXTERN_C_BEGIN
3994be45526SHong Zhang #undef __FUNCT__
4004be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW"
4014be45526SHong Zhang PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
4024f7415efSAmlan Barua {
4034f7415efSAmlan Barua   PetscErrorCode ierr;
4044f7415efSAmlan Barua   PetscMPIInt    size,rank;
4054f7415efSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
4064f7415efSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
4074f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
408*9496c9aeSAmlan Barua   PetscInt       N=fft->N;
4094f7415efSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
4104f7415efSAmlan Barua 
4114f7415efSAmlan Barua   PetscFunctionBegin;
4124f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
4134f7415efSAmlan Barua   PetscValidType(A,1);
4144f7415efSAmlan Barua 
4154f7415efSAmlan Barua   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
4164f7415efSAmlan Barua   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
4174f7415efSAmlan Barua   if (size == 1){ /* sequential case */
4184f7415efSAmlan Barua   printf("Routine is getting called\n");
4194f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4204f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
4214f7415efSAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
4224f7415efSAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
4234f7415efSAmlan Barua #else
424*9496c9aeSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
4254f7415efSAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
4264f7415efSAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);}
4274f7415efSAmlan Barua #endif
4284f7415efSAmlan Barua   } else {
4294f7415efSAmlan Barua     ptrdiff_t      alloc_local,local_n0,local_0_start;
430*9496c9aeSAmlan Barua     ptrdiff_t      local_n1;
431*9496c9aeSAmlan Barua     fftw_complex   *data_fout;
432*9496c9aeSAmlan Barua     ptrdiff_t      local_1_start;
433*9496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
434*9496c9aeSAmlan Barua     fftw_complex   *data_fin,*data_bout;
435*9496c9aeSAmlan Barua #else
4364f7415efSAmlan Barua     double         *data_finr,*data_boutr;
437*9496c9aeSAmlan Barua     PetscInt       n1,N1,vsize;
438*9496c9aeSAmlan Barua     ptrdiff_t      temp;
439*9496c9aeSAmlan Barua #endif
440*9496c9aeSAmlan Barua 
4414f7415efSAmlan Barua     switch (ndim){
4424f7415efSAmlan Barua           case 1:
44357625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
44457625b84SAmlan Barua                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
44557625b84SAmlan Barua #else
446*9496c9aeSAmlan Barua                  //SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet");
44757625b84SAmlan Barua                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
44857625b84SAmlan Barua                  if (fin) {
44957625b84SAmlan Barua                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
45057625b84SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
45157625b84SAmlan Barua                          (*fin)->ops->destroy = VecDestroy_MPIFFTW;
45257625b84SAmlan Barua                          }
45357625b84SAmlan Barua                  if (fout) {
45457625b84SAmlan Barua                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
45557625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
45657625b84SAmlan Barua                           (*fout)->ops->destroy = VecDestroy_MPIFFTW;
45757625b84SAmlan Barua                          }
45857625b84SAmlan Barua                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
45957625b84SAmlan Barua                  if (bout) {
46057625b84SAmlan Barua                           data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
46157625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
46257625b84SAmlan Barua                           (*bout)->ops->destroy = VecDestroy_MPIFFTW;
46357625b84SAmlan Barua                          }
46457625b84SAmlan Barua           break;
46557625b84SAmlan Barua #endif
4664f7415efSAmlan Barua           case 2:
4674f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4684f7415efSAmlan Barua                  alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4694f7415efSAmlan Barua                  N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
4704f7415efSAmlan Barua                  if (fin) {
4714f7415efSAmlan Barua                            data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4724f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
4734f7415efSAmlan Barua                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4744f7415efSAmlan Barua                           }
4754f7415efSAmlan Barua                  if (bout) {
4764f7415efSAmlan Barua                            data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4774f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
4784f7415efSAmlan Barua                            ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr);
4794f7415efSAmlan Barua                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4804f7415efSAmlan Barua                           }
481*9496c9aeSAmlan Barua                  n1 = 2*local_n1*dim[0];
48257625b84SAmlan Barua                  if (fout) {
48357625b84SAmlan Barua                             data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
484*9496c9aeSAmlan Barua                             ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
48557625b84SAmlan Barua                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
48657625b84SAmlan Barua                            }
4874f7415efSAmlan Barua #else
4884f7415efSAmlan Barua       /* Get local size */
4894f7415efSAmlan Barua                  alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
4904f7415efSAmlan Barua                  if (fin) {
4914f7415efSAmlan Barua                            data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4924f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
4934f7415efSAmlan Barua                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4944f7415efSAmlan Barua                           }
4954f7415efSAmlan Barua                  if (fout) {
4964f7415efSAmlan Barua                             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4974f7415efSAmlan Barua                             ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
4984f7415efSAmlan Barua                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
4994f7415efSAmlan Barua                            }
5004f7415efSAmlan Barua                  if (bout) {
5014f7415efSAmlan Barua                            data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5024f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5034f7415efSAmlan Barua                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5044f7415efSAmlan Barua                           }
5054f7415efSAmlan Barua #endif
5064f7415efSAmlan Barua           break;
5074f7415efSAmlan Barua           case 3:
5084f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5094f7415efSAmlan Barua                  alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5104f7415efSAmlan Barua                  N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
5114f7415efSAmlan Barua                  if (fin) {
5124f7415efSAmlan Barua                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5134f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
5144f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5154f7415efSAmlan Barua                          }
5164f7415efSAmlan Barua                  if (bout) {
5174f7415efSAmlan Barua                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5184f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
5194f7415efSAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5204f7415efSAmlan Barua                           }
521*9496c9aeSAmlan Barua                  n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
52257625b84SAmlan Barua                  if (fout) {
52357625b84SAmlan Barua                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
52457625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
52557625b84SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
52657625b84SAmlan Barua                           }
5274f7415efSAmlan Barua #else
5280c9b18e4SAmlan Barua                  alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
5290c9b18e4SAmlan Barua                  if (fin) {
5300c9b18e4SAmlan Barua                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5310c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5320c9b18e4SAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5330c9b18e4SAmlan Barua                          }
5340c9b18e4SAmlan Barua                  if (fout) {
5350c9b18e4SAmlan Barua                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5360c9b18e4SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5370c9b18e4SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5380c9b18e4SAmlan Barua                          }
5390c9b18e4SAmlan Barua                  if (bout) {
5400c9b18e4SAmlan Barua                          data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5410c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5420c9b18e4SAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5430c9b18e4SAmlan Barua                          }
5444f7415efSAmlan Barua #endif
5454f7415efSAmlan Barua           break;
5464f7415efSAmlan Barua           default:
5474f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5484f7415efSAmlan Barua                  temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
5494f7415efSAmlan Barua                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
5504f7415efSAmlan Barua                  alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5514f7415efSAmlan Barua                  N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
5524f7415efSAmlan Barua                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
5534f7415efSAmlan Barua 
5544f7415efSAmlan Barua                  if (fin) {
5554f7415efSAmlan Barua                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5564f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
5574f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5584f7415efSAmlan Barua                         }
5594f7415efSAmlan Barua                  if (bout) {
5604f7415efSAmlan Barua                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5614f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
562*9496c9aeSAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5634f7415efSAmlan Barua                         }
564*9496c9aeSAmlan Barua                  temp = fftw->partial_dim;
565*9496c9aeSAmlan Barua                  fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
566*9496c9aeSAmlan Barua                  n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
56757625b84SAmlan Barua                  if (fout) {
56857625b84SAmlan Barua                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
56957625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
57057625b84SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
57157625b84SAmlan Barua                         }
5724f7415efSAmlan Barua #else
5730c9b18e4SAmlan Barua                 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
5740c9b18e4SAmlan Barua                 if (fin) {
5750c9b18e4SAmlan Barua                        data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5760c9b18e4SAmlan Barua                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5770c9b18e4SAmlan Barua                        (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5780c9b18e4SAmlan Barua                        }
5790c9b18e4SAmlan Barua                 if (fout) {
5800c9b18e4SAmlan Barua                          data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5810c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5820c9b18e4SAmlan Barua                          (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5830c9b18e4SAmlan Barua                        }
5840c9b18e4SAmlan Barua                 if (bout) {
5850c9b18e4SAmlan Barua                        data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5860c9b18e4SAmlan Barua                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5870c9b18e4SAmlan Barua                        (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5880c9b18e4SAmlan Barua                        }
5894f7415efSAmlan Barua #endif
5904f7415efSAmlan Barua             break;
5914f7415efSAmlan Barua           }
5924f7415efSAmlan Barua   }
5934f7415efSAmlan Barua   PetscFunctionReturn(0);
5944f7415efSAmlan Barua }
5954f7415efSAmlan Barua EXTERN_C_END
5964f7415efSAmlan Barua 
597c92418ccSAmlan Barua #undef __FUNCT__
598b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW"
599b2b77a04SHong Zhang /*
600b2b77a04SHong Zhang    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
601b2b77a04SHong Zhang      parallel layout determined by FFTW
602b2b77a04SHong Zhang 
603b2b77a04SHong Zhang    Collective on Mat
604b2b77a04SHong Zhang 
605b2b77a04SHong Zhang    Input Parameter:
606b2b77a04SHong Zhang .  mat - the matrix
607b2b77a04SHong Zhang 
608b2b77a04SHong Zhang    Output Parameter:
609b2b77a04SHong Zhang +   fin - (optional) input vector of forward FFTW
610b2b77a04SHong Zhang -   fout - (optional) output vector of forward FFTW
611b2b77a04SHong Zhang 
612b2b77a04SHong Zhang   Level: advanced
613b2b77a04SHong Zhang 
614b2b77a04SHong Zhang .seealso: MatCreateFFTW()
615b2b77a04SHong Zhang */
616b2b77a04SHong Zhang PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
617b2b77a04SHong Zhang {
618b2b77a04SHong Zhang   PetscErrorCode ierr;
619b2b77a04SHong Zhang   PetscMPIInt    size,rank;
620b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
621b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
622c92418ccSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
623*9496c9aeSAmlan Barua   PetscInt       N=fft->N;
624e81bb599SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
625b2b77a04SHong Zhang 
626b2b77a04SHong Zhang   PetscFunctionBegin;
627b2b77a04SHong Zhang   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
628b2b77a04SHong Zhang   PetscValidType(A,1);
629b2b77a04SHong Zhang 
630b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
631b2b77a04SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
632b2b77a04SHong Zhang   if (size == 1){ /* sequential case */
633e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX)
634b2b77a04SHong Zhang     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
635b2b77a04SHong Zhang     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
636e5d7f247SAmlan Barua #else
637e81bb599SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
638e81bb599SAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
639e81bb599SAmlan Barua #endif
640b2b77a04SHong Zhang   } else {        /* mpi case */
641b2b77a04SHong Zhang     ptrdiff_t      alloc_local,local_n0,local_0_start;
6426882372aSHong Zhang     ptrdiff_t      local_n1,local_1_end;
643b2b77a04SHong Zhang     fftw_complex   *data_fin,*data_fout;
644*9496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
64551d1eed7SAmlan Barua     double         *data_finr ;
646b3a17365SAmlan Barua     ptrdiff_t      local_1_start,temp;
647*9496c9aeSAmlan Barua     PetscInt       vsize,n1,N1;
648*9496c9aeSAmlan Barua #endif
649*9496c9aeSAmlan Barua 
650c92418ccSAmlan Barua //    PetscInt ctr;
651c92418ccSAmlan Barua //    ptrdiff_t      ndim1,*pdim;
652c92418ccSAmlan Barua //    ndim1=(ptrdiff_t) ndim;
653c92418ccSAmlan Barua //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
65411902ff2SHong Zhang 
655c92418ccSAmlan Barua //    for(ctr=0;ctr<ndim;ctr++)
656f76f76beSAmlan Barua //        {k
657c92418ccSAmlan Barua //           pdim[ctr] = dim[ctr];
658c92418ccSAmlan Barua //       }
659b2b77a04SHong Zhang 
660f76f76beSAmlan Barua 
661f76f76beSAmlan Barua 
662b2b77a04SHong Zhang     switch (ndim){
663b2b77a04SHong Zhang     case 1:
6646882372aSHong Zhang       /* Get local size */
665e5d7f247SAmlan Barua       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
666c92418ccSAmlan Barua //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
6676882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
668*9496c9aeSAmlan Barua       printf("The values of local_n0 and local_n1 are %ld and %ld\n",local_n0,local_n1);
6696882372aSHong Zhang       if (fin) {
6706882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6716882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
6726882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6736882372aSHong Zhang       }
6746882372aSHong Zhang       if (fout) {
6756882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
67657625b84SAmlan Barua         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6776882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6786882372aSHong Zhang       }
679b2b77a04SHong Zhang       break;
680b2b77a04SHong Zhang     case 2:
6813c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6823c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
6833c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
6843c3a9c75SAmlan Barua       if (fin) {
6853c3a9c75SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
68654dd5118SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
6873c3a9c75SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
68809dd8a53SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
6893c3a9c75SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6903c3a9c75SAmlan Barua       }
69157625b84SAmlan Barua       n1 = 2*local_n1*(dim[0]);
69257625b84SAmlan Barua       //n1 = 2*local_n1*dim[1];
69357625b84SAmlan Barua       printf("The values are %ld\n",local_n1);
6943c3a9c75SAmlan Barua       if (fout) {
69509dd8a53SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
69609dd8a53SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6973c3a9c75SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6983c3a9c75SAmlan Barua       }
699f76f76beSAmlan Barua       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
7003c3a9c75SAmlan Barua 
7013c3a9c75SAmlan Barua #else
702b2b77a04SHong Zhang       /* Get local size */
70364657d84SAmlan Barua      //printf("Hope this does not come here");
704b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
705b2b77a04SHong Zhang       if (fin) {
706b2b77a04SHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
707b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
708b2b77a04SHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
709b2b77a04SHong Zhang       }
710b2b77a04SHong Zhang       if (fout) {
711b2b77a04SHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
712b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
713b2b77a04SHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
714b2b77a04SHong Zhang       }
71564657d84SAmlan Barua      //printf("Hope this does not come here");
7163c3a9c75SAmlan Barua #endif
717b2b77a04SHong Zhang       break;
718b2b77a04SHong Zhang     case 3:
7196882372aSHong Zhang       /* Get local size */
7203c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
72151d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
72251d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
72351d1eed7SAmlan Barua       if (fin) {
72451d1eed7SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
72551d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
72651d1eed7SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
72751d1eed7SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
72851d1eed7SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
72951d1eed7SAmlan Barua       }
73057625b84SAmlan Barua       printf("The value is %ld",local_n1);
73157625b84SAmlan Barua       n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
73251d1eed7SAmlan Barua       if (fout) {
73351d1eed7SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
73451d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
73551d1eed7SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
73651d1eed7SAmlan Barua       }
73751d1eed7SAmlan Barua       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
73851d1eed7SAmlan Barua 
73951d1eed7SAmlan Barua 
7403c3a9c75SAmlan Barua #else
7416882372aSHong Zhang       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
74211902ff2SHong Zhang //      printf("The quantity n is %d",n);
7436882372aSHong Zhang       if (fin) {
7446882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7456882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
7466882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
7476882372aSHong Zhang       }
7486882372aSHong Zhang       if (fout) {
7496882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7506882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
7516882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
7526882372aSHong Zhang       }
7533c3a9c75SAmlan Barua #endif
754b2b77a04SHong Zhang       break;
755b2b77a04SHong Zhang     default:
7566882372aSHong Zhang       /* Get local size */
7573c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
758b3a17365SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
759b3a17365SAmlan Barua       printf("The value of temp is %ld\n",temp);
760b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
761b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
762b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
763b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
764b3a17365SAmlan Barua       if (fin) {
765b3a17365SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
766b3a17365SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
767b3a17365SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
768b3a17365SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
769b3a17365SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
770b3a17365SAmlan Barua       }
77157625b84SAmlan Barua       printf("The value is %ld. Global length is %d \n",local_n1,N1);
77257625b84SAmlan Barua       temp = fftw->partial_dim;
77357625b84SAmlan Barua       fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
77457625b84SAmlan Barua 
77557625b84SAmlan Barua       n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
776b3a17365SAmlan Barua       if (fout) {
777b3a17365SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
77857625b84SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
779b3a17365SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
780b3a17365SAmlan Barua       }
781b3a17365SAmlan Barua 
7823c3a9c75SAmlan Barua #else
783c92418ccSAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
78411902ff2SHong Zhang //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
78511902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
78611902ff2SHong Zhang //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
78711902ff2SHong Zhang //      for(i=0;i<ndim;i++)
78811902ff2SHong Zhang //         {
78911902ff2SHong Zhang //          pdim[i]=dim[i];printf("%d",pdim[i]);
79011902ff2SHong Zhang //         }
79111902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
79211902ff2SHong Zhang //      printf("The quantity n is %d",n);
7936882372aSHong Zhang       if (fin) {
7946882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7956882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
7966882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
7976882372aSHong Zhang       }
7986882372aSHong Zhang       if (fout) {
7996882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
8006882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
8016882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
8026882372aSHong Zhang       }
8033c3a9c75SAmlan Barua #endif
804b2b77a04SHong Zhang       break;
805b2b77a04SHong Zhang     }
806b2b77a04SHong Zhang   }
80754dd5118SAmlan Barua //  if (fin){
80854dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
80954dd5118SAmlan Barua //  }
81054dd5118SAmlan Barua //  if (fout){
81154dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
81254dd5118SAmlan Barua //  }
813b2b77a04SHong Zhang   PetscFunctionReturn(0);
814b2b77a04SHong Zhang }
815b2b77a04SHong Zhang 
816b2b77a04SHong Zhang #undef __FUNCT__
8173c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT"
8183c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
8193c3a9c75SAmlan Barua {
8203c3a9c75SAmlan Barua   PetscErrorCode ierr;
8213c3a9c75SAmlan Barua   PetscFunctionBegin;
8223c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
8233c3a9c75SAmlan Barua   PetscFunctionReturn(0);
8243c3a9c75SAmlan Barua }
82554efbe56SHong Zhang 
826b2b77a04SHong Zhang /*
827*9496c9aeSAmlan Barua       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block. For real
828*9496c9aeSAmlan Barua       parallel FFT, this routine also performs padding of right number of zeros at the end of the fastetst
829*9496c9aeSAmlan Barua       changing dimension.
8303c3a9c75SAmlan Barua   Input A, x, y
8313c3a9c75SAmlan Barua   A - FFTW matrix
83254dd5118SAmlan Barua   x - user data
833b2b77a04SHong Zhang   Options Database Keys:
834b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags
835b2b77a04SHong Zhang 
836b2b77a04SHong Zhang    Level: intermediate
837b2b77a04SHong Zhang 
838b2b77a04SHong Zhang */
8393c3a9c75SAmlan Barua 
8403c3a9c75SAmlan Barua EXTERN_C_BEGIN
8413c3a9c75SAmlan Barua #undef __FUNCT__
8423c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW"
8433c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
8443c3a9c75SAmlan Barua {
8453c3a9c75SAmlan Barua   PetscErrorCode ierr;
8463c3a9c75SAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
8473c3a9c75SAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
8483c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
849*9496c9aeSAmlan Barua   PetscInt       N=fft->N;
850b223cc97SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
851*9496c9aeSAmlan Barua   PetscInt       low;
852*9496c9aeSAmlan Barua   PetscInt       rank,size;
8533c3a9c75SAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
854*9496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
8553c3a9c75SAmlan Barua   VecScatter     vecscat;
8563c3a9c75SAmlan Barua   IS             list1,list2;
857*9496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
858*9496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
859*9496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
860*9496c9aeSAmlan Barua   PetscInt       N1, n1 ,NM;
861*9496c9aeSAmlan Barua   ptrdiff_t      temp;
862*9496c9aeSAmlan Barua #endif
8633c3a9c75SAmlan Barua 
864f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
865f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
8663c3a9c75SAmlan Barua   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
8673c3a9c75SAmlan Barua 
868e81bb599SAmlan Barua   if (size==1)
869e81bb599SAmlan Barua     {
8707536937bSAmlan Barua /*     switch (ndim){
871e81bb599SAmlan Barua      case 1:
872e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
873e81bb599SAmlan Barua           for (i=0;i<dim[0];i++)
874e81bb599SAmlan Barua              {
875e81bb599SAmlan Barua               indx1[i] = i;
876e81bb599SAmlan Barua              }
877e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
878e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
879e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
880e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
882b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
883b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
884e81bb599SAmlan Barua      break;
885e81bb599SAmlan Barua 
886e81bb599SAmlan Barua      case 2:
887e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
888e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
889e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
890e81bb599SAmlan Barua                 indx1[i*dim[1]+j] = i*dim[1] + j;
891e81bb599SAmlan Barua              }
892e81bb599SAmlan Barua           }
893e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
894e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
895e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
896e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
897e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
898b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
899b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
900e81bb599SAmlan Barua           break;
901e81bb599SAmlan Barua      case 3:
902e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
903e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
904e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
905e81bb599SAmlan Barua                 for (k=0;k<dim[2];k++){
906e81bb599SAmlan Barua                    indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
907e81bb599SAmlan Barua                 }
908e81bb599SAmlan Barua              }
909e81bb599SAmlan Barua           }
910e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
911e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
912e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
913e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
914e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
915b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
916b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
917e81bb599SAmlan Barua           break;
918e81bb599SAmlan Barua      default:
9197536937bSAmlan Barua */
920*9496c9aeSAmlan Barua           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
9216971673cSAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
9226971673cSAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9236971673cSAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9246971673cSAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
925b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
9267536937bSAmlan Barua  //         break;
9277536937bSAmlan Barua   //    }
928e81bb599SAmlan Barua     }
929e81bb599SAmlan Barua 
930e81bb599SAmlan Barua  else{
931e81bb599SAmlan Barua 
9323c3a9c75SAmlan Barua  switch (ndim){
9333c3a9c75SAmlan Barua  case 1:
93464657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
93564657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
93664657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
93764657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
93864657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
93964657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
94064657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
94164657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
94264657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
94364657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
94464657d84SAmlan Barua #else
945e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
94664657d84SAmlan Barua #endif
9473c3a9c75SAmlan Barua  break;
9483c3a9c75SAmlan Barua  case 2:
949bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
950bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
951bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
952bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
953bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
954bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
955bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
956bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
957bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
958bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
959bd59e6c6SAmlan Barua #else
9603c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
9613c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
962*9496c9aeSAmlan Barua       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
963*9496c9aeSAmlan Barua       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
964*9496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
965*9496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
9663c3a9c75SAmlan Barua 
9673c3a9c75SAmlan Barua       if (dim[1]%2==0)
9683c3a9c75SAmlan Barua         NM = dim[1]+2;
9693c3a9c75SAmlan Barua       else
9703c3a9c75SAmlan Barua         NM = dim[1]+1;
9713c3a9c75SAmlan Barua 
9723c3a9c75SAmlan Barua       for (i=0;i<local_n0;i++){
9733c3a9c75SAmlan Barua          for (j=0;j<dim[1];j++){
9743c3a9c75SAmlan Barua             tempindx = i*dim[1] + j;
9753c3a9c75SAmlan Barua             tempindx1 = i*NM + j;
9765b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
9773c3a9c75SAmlan Barua             indx2[tempindx]=low+tempindx1;
9783c3a9c75SAmlan Barua         }
9793c3a9c75SAmlan Barua      }
9803c3a9c75SAmlan Barua 
9813c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
9823c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
9833c3a9c75SAmlan Barua 
984f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
985f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
986f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
987f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
988b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
989b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
990b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
991b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
992bd59e6c6SAmlan Barua #endif
993*9496c9aeSAmlan Barua  break;
9943c3a9c75SAmlan Barua 
9953c3a9c75SAmlan Barua  case 3:
996bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
997bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
998bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
999bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1000bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1001bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1002bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1003bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1006bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1007bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1008bd59e6c6SAmlan Barua #else
100951d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
101051d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
101151d1eed7SAmlan Barua 
1012*9496c9aeSAmlan Barua //      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1013*9496c9aeSAmlan Barua //      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1014*9496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1015*9496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
101651d1eed7SAmlan Barua 
101751d1eed7SAmlan Barua       if (dim[2]%2==0)
101851d1eed7SAmlan Barua         NM = dim[2]+2;
101951d1eed7SAmlan Barua       else
102051d1eed7SAmlan Barua         NM = dim[2]+1;
102151d1eed7SAmlan Barua 
102251d1eed7SAmlan Barua       for (i=0;i<local_n0;i++){
102351d1eed7SAmlan Barua          for (j=0;j<dim[1];j++){
102451d1eed7SAmlan Barua             for (k=0;k<dim[2];k++){
102551d1eed7SAmlan Barua                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
102651d1eed7SAmlan Barua                tempindx1 = i*dim[1]*NM + j*NM + k;
102751d1eed7SAmlan Barua                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
102851d1eed7SAmlan Barua                indx2[tempindx]=low+tempindx1;
102951d1eed7SAmlan Barua             }
103051d1eed7SAmlan Barua         }
103151d1eed7SAmlan Barua      }
103251d1eed7SAmlan Barua 
103351d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
103451d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
103551d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
103651d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
103751d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
103851d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1039b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1040b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1041b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1042b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1043bd59e6c6SAmlan Barua #endif
1044*9496c9aeSAmlan Barua  break;
10453c3a9c75SAmlan Barua 
10463c3a9c75SAmlan Barua  default:
1047bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1048bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1049bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1050bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1051bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1052bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1053bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1054bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1055bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1056bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1057bd59e6c6SAmlan Barua #else
1058e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1059e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1060e5d7f247SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1061e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1062e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1063e5d7f247SAmlan Barua 
1064e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
1065e5d7f247SAmlan Barua 
1066e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1067e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1068e5d7f247SAmlan Barua 
1069e5d7f247SAmlan Barua       if (dim[ndim-1]%2==0)
1070ba6e06d0SAmlan Barua         NM = 2;
1071e5d7f247SAmlan Barua       else
1072ba6e06d0SAmlan Barua         NM = 1;
1073e5d7f247SAmlan Barua 
10746971673cSAmlan Barua       j = low;
10756971673cSAmlan Barua       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
10766971673cSAmlan Barua          {
10776971673cSAmlan Barua           indx1[i] = local_0_start*partial_dim + i;
10786971673cSAmlan Barua           indx2[i] = j;
10796971673cSAmlan Barua           if (k%dim[ndim-1]==0)
10806971673cSAmlan Barua             { j+=NM;}
10816971673cSAmlan Barua           j++;
10826971673cSAmlan Barua          }
10836971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
10846971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
10856971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
10866971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10876971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10886971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1089b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1090b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1091b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1092b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1093bd59e6c6SAmlan Barua #endif
1094*9496c9aeSAmlan Barua  break;
10953c3a9c75SAmlan Barua   }
1096e81bb599SAmlan Barua  }
10973c3a9c75SAmlan Barua 
10983c3a9c75SAmlan Barua  return 0;
10993c3a9c75SAmlan Barua }
11003c3a9c75SAmlan Barua EXTERN_C_END
11013c3a9c75SAmlan Barua 
11023c3a9c75SAmlan Barua #undef __FUNCT__
11033c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT"
11043c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
11053c3a9c75SAmlan Barua {
11063c3a9c75SAmlan Barua   PetscErrorCode ierr;
11073c3a9c75SAmlan Barua   PetscFunctionBegin;
11083c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
11093c3a9c75SAmlan Barua   PetscFunctionReturn(0);
11103c3a9c75SAmlan Barua }
111154efbe56SHong Zhang 
11125b3e41ffSAmlan Barua /*
11135b3e41ffSAmlan Barua       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
11145b3e41ffSAmlan Barua   Input A, x, y
11155b3e41ffSAmlan Barua   A - FFTW matrix
11165b3e41ffSAmlan Barua   x - FFTW vector
11175b3e41ffSAmlan Barua   y - PETSc vector that user can read
11185b3e41ffSAmlan Barua   Options Database Keys:
11195b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
11205b3e41ffSAmlan Barua 
11215b3e41ffSAmlan Barua    Level: intermediate
11225b3e41ffSAmlan Barua 
11233c3a9c75SAmlan Barua */
11243c3a9c75SAmlan Barua 
11253c3a9c75SAmlan Barua EXTERN_C_BEGIN
11263c3a9c75SAmlan Barua #undef __FUNCT__
11275b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW"
11285b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
11295b3e41ffSAmlan Barua {
11305b3e41ffSAmlan Barua   PetscErrorCode ierr;
11315b3e41ffSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
11325b3e41ffSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
11335b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
1134*9496c9aeSAmlan Barua   PetscInt       N=fft->N;
1135b223cc97SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
1136*9496c9aeSAmlan Barua   PetscInt       low;
1137*9496c9aeSAmlan Barua   PetscInt       rank,size;
11385b3e41ffSAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
1139*9496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
11405b3e41ffSAmlan Barua   VecScatter     vecscat;
11415b3e41ffSAmlan Barua   IS             list1,list2;
1142*9496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1143*9496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
1144*9496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
1145*9496c9aeSAmlan Barua   PetscInt       N1, n1 ,NM;
1146*9496c9aeSAmlan Barua   ptrdiff_t      temp;
1147*9496c9aeSAmlan Barua #endif
1148*9496c9aeSAmlan Barua 
11495b3e41ffSAmlan Barua 
11505b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
11515b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1152cfe1ae98SAmlan Barua   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);
11535b3e41ffSAmlan Barua 
1154e81bb599SAmlan Barua   if (size==1){
11557536937bSAmlan Barua /*
11565b3e41ffSAmlan Barua     switch (ndim){
11575b3e41ffSAmlan Barua     case 1:
1158e81bb599SAmlan Barua            ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
1159e81bb599SAmlan Barua           for (i=0;i<dim[0];i++)
1160e81bb599SAmlan Barua              {
1161e81bb599SAmlan Barua               indx1[i] = i;
1162e81bb599SAmlan Barua              }
1163e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1164e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1165e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1166e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1167e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1168b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
1169b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
1170e81bb599SAmlan Barua           break;
1171e81bb599SAmlan Barua 
1172e81bb599SAmlan Barua     case 2:
1173e81bb599SAmlan Barua          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
1174e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
1175e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
1176e81bb599SAmlan Barua                 indx1[i*dim[1]+j] = i*dim[1] + j;
1177e81bb599SAmlan Barua              }
1178e81bb599SAmlan Barua           }
1179e81bb599SAmlan Barua          ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1180e81bb599SAmlan Barua          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1181e81bb599SAmlan Barua          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1182e81bb599SAmlan Barua          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1183e81bb599SAmlan Barua          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1184b223cc97SAmlan Barua          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1185b223cc97SAmlan Barua          ierr = PetscFree(indx1);CHKERRQ(ierr);
1186e81bb599SAmlan Barua          break;
1187e81bb599SAmlan Barua     case 3:
1188e81bb599SAmlan Barua          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1189e81bb599SAmlan Barua          for (i=0;i<dim[0];i++){
1190e81bb599SAmlan Barua             for (j=0;j<dim[1];j++){
1191e81bb599SAmlan Barua                for (k=0;k<dim[2];k++){
1192e81bb599SAmlan Barua                   indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
1193e81bb599SAmlan Barua                }
1194e81bb599SAmlan Barua             }
1195e81bb599SAmlan Barua          }
1196e81bb599SAmlan Barua          ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1197e81bb599SAmlan Barua          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1198e81bb599SAmlan Barua          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1199e81bb599SAmlan Barua          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1200e81bb599SAmlan Barua          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1201b223cc97SAmlan Barua          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1202b223cc97SAmlan Barua          ierr = PetscFree(indx1);CHKERRQ(ierr);
1203e81bb599SAmlan Barua          break;
1204e81bb599SAmlan Barua     default:
12057536937bSAmlan Barua */
12066971673cSAmlan Barua          ierr = ISCreateStride(comm,N,0,1,&list1);
12076971673cSAmlan Barua          //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF);
12086971673cSAmlan Barua          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
12096971673cSAmlan Barua          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
12106971673cSAmlan Barua          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
12116971673cSAmlan Barua          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1212b223cc97SAmlan Barua          ierr = ISDestroy(&list1);CHKERRQ(ierr);
12137536937bSAmlan Barua   //       break;
12147536937bSAmlan Barua    // }
1215e81bb599SAmlan Barua   }
1216e81bb599SAmlan Barua   else{
1217e81bb599SAmlan Barua 
1218e81bb599SAmlan Barua  switch (ndim){
1219e81bb599SAmlan Barua  case 1:
122064657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
122164657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1222*9496c9aeSAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1);
1223*9496c9aeSAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2);
122464657d84SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
122564657d84SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
122664657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
122764657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
122864657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
122964657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
123064657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
123164657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
123264657d84SAmlan Barua #else
12336a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
123464657d84SAmlan Barua #endif
12355b3e41ffSAmlan Barua  break;
12365b3e41ffSAmlan Barua  case 2:
1237bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1238bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1239bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1240bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
1241bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1242bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1243bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1244bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1245bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1246bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1247bd59e6c6SAmlan Barua #else
12485b3e41ffSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
12495b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
12505b3e41ffSAmlan Barua 
1251*9496c9aeSAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
1252*9496c9aeSAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
12535b3e41ffSAmlan Barua 
12545b3e41ffSAmlan Barua      if (dim[1]%2==0)
12555b3e41ffSAmlan Barua       NM = dim[1]+2;
12565b3e41ffSAmlan Barua     else
12575b3e41ffSAmlan Barua       NM = dim[1]+1;
12585b3e41ffSAmlan Barua 
12595b3e41ffSAmlan Barua 
12605b3e41ffSAmlan Barua 
12615b3e41ffSAmlan Barua      for (i=0;i<local_n0;i++){
12625b3e41ffSAmlan Barua         for (j=0;j<dim[1];j++){
12635b3e41ffSAmlan Barua             tempindx = i*dim[1] + j;
12645b3e41ffSAmlan Barua             tempindx1 = i*NM + j;
12655b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
12665b3e41ffSAmlan Barua             indx2[tempindx]=low+tempindx1;
1267cfe1ae98SAmlan Barua        //     printf("Val tempindx1 = %d\n",tempindx1);
1268cfe1ae98SAmlan Barua        //     printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1269cfe1ae98SAmlan Barua        //     printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1270cfe1ae98SAmlan Barua        //     printf("-------------------------\n",indx2[tempindx],rank);
12715b3e41ffSAmlan Barua         }
12725b3e41ffSAmlan Barua      }
12735b3e41ffSAmlan Barua 
12745b3e41ffSAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
12755b3e41ffSAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
12765b3e41ffSAmlan Barua 
12775b3e41ffSAmlan Barua      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
12785b3e41ffSAmlan Barua      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
12795b3e41ffSAmlan Barua      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
12805b3e41ffSAmlan Barua      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1281b223cc97SAmlan Barua      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1282b223cc97SAmlan Barua      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1283b223cc97SAmlan Barua      ierr = PetscFree(indx1);CHKERRQ(ierr);
1284b223cc97SAmlan Barua      ierr = PetscFree(indx2);CHKERRQ(ierr);
1285bd59e6c6SAmlan Barua #endif
1286*9496c9aeSAmlan Barua  break;
12875b3e41ffSAmlan Barua  case 3:
1288bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1289bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1290bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1291bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1292bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1293bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1294bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1295bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1296bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1297bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1298bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1299bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1300bd59e6c6SAmlan Barua #else
1301bd59e6c6SAmlan Barua 
130251d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
130351d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
130451d1eed7SAmlan Barua 
1305*9496c9aeSAmlan Barua //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1306*9496c9aeSAmlan Barua //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1307*9496c9aeSAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1308*9496c9aeSAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
130951d1eed7SAmlan Barua 
131051d1eed7SAmlan Barua      if (dim[2]%2==0)
131151d1eed7SAmlan Barua       NM = dim[2]+2;
131251d1eed7SAmlan Barua     else
131351d1eed7SAmlan Barua       NM = dim[2]+1;
131451d1eed7SAmlan Barua 
131551d1eed7SAmlan Barua      for (i=0;i<local_n0;i++){
131651d1eed7SAmlan Barua         for (j=0;j<dim[1];j++){
131751d1eed7SAmlan Barua            for (k=0;k<dim[2];k++){
131851d1eed7SAmlan Barua               tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
131951d1eed7SAmlan Barua               tempindx1 = i*dim[1]*NM + j*NM + k;
132051d1eed7SAmlan Barua               indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
132151d1eed7SAmlan Barua               indx2[tempindx]=low+tempindx1;
132251d1eed7SAmlan Barua            }
132351d1eed7SAmlan Barua         //    printf("Val tempindx1 = %d\n",tempindx1);
132451d1eed7SAmlan Barua         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
132551d1eed7SAmlan Barua         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
132651d1eed7SAmlan Barua         //    printf("-------------------------\n",indx2[tempindx],rank);
132751d1eed7SAmlan Barua         }
132851d1eed7SAmlan Barua      }
132951d1eed7SAmlan Barua 
133051d1eed7SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
133151d1eed7SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
133251d1eed7SAmlan Barua 
133351d1eed7SAmlan Barua      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
133451d1eed7SAmlan Barua      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
133551d1eed7SAmlan Barua      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
133651d1eed7SAmlan Barua      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1337b223cc97SAmlan Barua      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1338b223cc97SAmlan Barua      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1339b223cc97SAmlan Barua      ierr = PetscFree(indx1);CHKERRQ(ierr);
1340b223cc97SAmlan Barua      ierr = PetscFree(indx2);CHKERRQ(ierr);
1341bd59e6c6SAmlan Barua #endif
1342*9496c9aeSAmlan Barua  break;
13435b3e41ffSAmlan Barua  default:
1344bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1345bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1346bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1347bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1348bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1349bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1350bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1351bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1352bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1353bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1354bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1355bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1356bd59e6c6SAmlan Barua #else
1357ba6e06d0SAmlan Barua      temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1358ba6e06d0SAmlan Barua      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1359ba6e06d0SAmlan Barua      alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1360ba6e06d0SAmlan Barua      N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1361ba6e06d0SAmlan Barua      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1362ba6e06d0SAmlan Barua 
1363ba6e06d0SAmlan Barua      partial_dim = fftw->partial_dim;
1364ba6e06d0SAmlan Barua 
1365ba6e06d0SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1366ba6e06d0SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1367ba6e06d0SAmlan Barua 
1368ba6e06d0SAmlan Barua      if (dim[ndim-1]%2==0)
1369ba6e06d0SAmlan Barua        NM = 2;
1370ba6e06d0SAmlan Barua      else
1371ba6e06d0SAmlan Barua        NM = 1;
1372ba6e06d0SAmlan Barua 
1373ba6e06d0SAmlan Barua      j = low;
1374ba6e06d0SAmlan Barua      for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1375ba6e06d0SAmlan Barua         {
1376ba6e06d0SAmlan Barua          indx1[i] = local_0_start*partial_dim + i;
1377ba6e06d0SAmlan Barua          indx2[i] = j;
1378ba6e06d0SAmlan Barua          //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
1379ba6e06d0SAmlan Barua          if (k%dim[ndim-1]==0)
1380ba6e06d0SAmlan Barua            { j+=NM;}
1381ba6e06d0SAmlan Barua          j++;
1382ba6e06d0SAmlan Barua         }
1383ba6e06d0SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1384ba6e06d0SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1385ba6e06d0SAmlan Barua 
1386ba6e06d0SAmlan Barua       //ISView(list1,PETSC_VIEWER_STDOUT_SELF);
1387ba6e06d0SAmlan Barua 
1388ba6e06d0SAmlan Barua 
1389ba6e06d0SAmlan Barua      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1390ba6e06d0SAmlan Barua      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1391ba6e06d0SAmlan Barua      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1392ba6e06d0SAmlan Barua      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1393b223cc97SAmlan Barua      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1394b223cc97SAmlan Barua      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1395b223cc97SAmlan Barua      ierr = PetscFree(indx1);CHKERRQ(ierr);
1396b223cc97SAmlan Barua      ierr = PetscFree(indx2);CHKERRQ(ierr);
1397bd59e6c6SAmlan Barua #endif
1398*9496c9aeSAmlan Barua  break;
13995b3e41ffSAmlan Barua  }
1400e81bb599SAmlan Barua  }
14015b3e41ffSAmlan Barua  return 0;
14025b3e41ffSAmlan Barua }
14035b3e41ffSAmlan Barua EXTERN_C_END
14045b3e41ffSAmlan Barua 
14055b3e41ffSAmlan Barua EXTERN_C_BEGIN
14065b3e41ffSAmlan Barua #undef __FUNCT__
14073c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
14083c3a9c75SAmlan Barua /*
14093c3a9c75SAmlan Barua       MatCreate_FFTW - Creates a matrix object that provides FFT
14103c3a9c75SAmlan Barua   via the external package FFTW
14113c3a9c75SAmlan Barua   Options Database Keys:
14123c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
14133c3a9c75SAmlan Barua 
14143c3a9c75SAmlan Barua    Level: intermediate
14153c3a9c75SAmlan Barua 
14163c3a9c75SAmlan Barua */
14173c3a9c75SAmlan Barua 
1418b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A)
1419b2b77a04SHong Zhang {
1420b2b77a04SHong Zhang   PetscErrorCode ierr;
1421b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
1422b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1423b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1424b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1425b2b77a04SHong Zhang   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1426b2b77a04SHong Zhang   PetscBool      flg;
1427*9496c9aeSAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr,n1;
142811902ff2SHong Zhang   PetscMPIInt    size,rank;
1429*9496c9aeSAmlan Barua   ptrdiff_t      *pdim;
1430*9496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
1431*9496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1432*9496c9aeSAmlan Barua    ptrdiff_t     temp;
1433*9496c9aeSAmlan Barua    PetscInt      N1;
1434*9496c9aeSAmlan Barua #endif
1435*9496c9aeSAmlan Barua 
1436b2b77a04SHong Zhang 
1437b2b77a04SHong Zhang   PetscFunctionBegin;
1438b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
143911902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
144054dd5118SAmlan Barua   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1441c92418ccSAmlan Barua 
144211902ff2SHong Zhang   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
144311902ff2SHong Zhang   pdim[0] = dim[0];
144411902ff2SHong Zhang   for (ctr=1;ctr<ndim;ctr++)
144511902ff2SHong Zhang      {
14466882372aSHong Zhang           partial_dim *= dim[ctr];
144711902ff2SHong Zhang           pdim[ctr] = dim[ctr];
14486882372aSHong Zhang      }
14493c3a9c75SAmlan Barua 
1450b2b77a04SHong Zhang   if (size == 1) {
1451e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1452b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1453b2b77a04SHong Zhang     n = N;
1454e81bb599SAmlan Barua #else
1455*9496c9aeSAmlan Barua     PetscInt tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1];
1456e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1457e81bb599SAmlan Barua     n = tot_dim;
1458e81bb599SAmlan Barua #endif
1459e81bb599SAmlan Barua 
1460b2b77a04SHong Zhang   } else {
1461*9496c9aeSAmlan Barua     ptrdiff_t alloc_local,local_n0,local_0_start;//local_1_end;
1462b2b77a04SHong Zhang     switch (ndim){
1463b2b77a04SHong Zhang     case 1:
14643c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
14653c3a9c75SAmlan Barua    SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1466e5d7f247SAmlan Barua #else
1467*9496c9aeSAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
14686882372aSHong Zhang       n = (PetscInt)local_n0;
1469*9496c9aeSAmlan Barua       n1 = (PetscInt) local_n1;
1470*9496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1471e5d7f247SAmlan Barua #endif
1472b2b77a04SHong Zhang       break;
1473b2b77a04SHong Zhang     case 2:
14745b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
1475b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1476b2b77a04SHong Zhang       /*
1477b2b77a04SHong Zhang        PetscMPIInt    rank;
1478b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1479b2b77a04SHong Zhang        PetscSynchronizedFlush(comm);
1480b2b77a04SHong Zhang        */
1481b2b77a04SHong Zhang       n = (PetscInt)local_n0*dim[1];
1482b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
14835b3e41ffSAmlan Barua #else
14845b3e41ffSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
14855b3e41ffSAmlan Barua       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1486*9496c9aeSAmlan Barua       n1 = 2*(PetscInt)local_n1*(dim[0]);
1487*9496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
14885b3e41ffSAmlan Barua #endif
1489b2b77a04SHong Zhang       break;
1490b2b77a04SHong Zhang     case 3:
149151d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
149251d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
14936882372aSHong Zhang       n = (PetscInt)local_n0*dim[1]*dim[2];
14946882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
149551d1eed7SAmlan Barua #else
149651d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
149751d1eed7SAmlan Barua       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1498*9496c9aeSAmlan Barua       n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
1499*9496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
150051d1eed7SAmlan Barua #endif
1501b2b77a04SHong Zhang       break;
1502b2b77a04SHong Zhang     default:
1503b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
150411902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
15056882372aSHong Zhang       n = (PetscInt)local_n0*partial_dim;
15066882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1507b3a17365SAmlan Barua #else
1508b3a17365SAmlan Barua       temp = pdim[ndim-1];
1509b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
1510*9496c9aeSAmlan Barua       //printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]);
1511b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1512b3a17365SAmlan Barua       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1513b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1514b3a17365SAmlan Barua       pdim[ndim-1] = temp;
1515*9496c9aeSAmlan Barua       //printf("For Multi dim case n = %d, N1  = %d\n",n,N1);
1516*9496c9aeSAmlan Barua       temp = partial_dim*(dim[ndim-1]/2+1)*dim[0]/(dim[1]*dim[ndim-1]);
1517*9496c9aeSAmlan Barua       n1 = 2*local_n1*temp;
1518*9496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N1,N1);CHKERRQ(ierr);
1519b3a17365SAmlan Barua #endif
1520b2b77a04SHong Zhang       break;
1521b2b77a04SHong Zhang     }
1522b2b77a04SHong Zhang   }
1523b2b77a04SHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1524b2b77a04SHong Zhang   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1525b2b77a04SHong Zhang   fft->data = (void*)fftw;
1526b2b77a04SHong Zhang 
1527b2b77a04SHong Zhang   fft->n           = n;
1528c92418ccSAmlan Barua   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1529e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
1530c92418ccSAmlan Barua   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1531b1301b2eSHong Zhang   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1532c92418ccSAmlan Barua 
1533b2b77a04SHong Zhang   fftw->p_forward  = 0;
1534b2b77a04SHong Zhang   fftw->p_backward = 0;
1535b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1536b2b77a04SHong Zhang 
1537b2b77a04SHong Zhang   if (size == 1){
1538b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1539b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1540b2b77a04SHong Zhang   } else {
1541b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1542b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1543b2b77a04SHong Zhang   }
1544b2b77a04SHong Zhang   fft->matdestroy          = MatDestroy_FFTW;
15456a506ed8SAmlan Barua   // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D
15464be45526SHong Zhang   //A->ops->getvecs       = MatGetVecs_FFTW;
1547b2b77a04SHong Zhang   A->assembled          = PETSC_TRUE;
15484be45526SHong Zhang   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);
15493c3a9c75SAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
15505b3e41ffSAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
1551b2b77a04SHong Zhang 
1552b2b77a04SHong Zhang   /* get runtime options */
1553b2b77a04SHong Zhang   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1554b2b77a04SHong Zhang     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1555b2b77a04SHong Zhang     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1556b2b77a04SHong Zhang   PetscOptionsEnd();
1557b2b77a04SHong Zhang   PetscFunctionReturn(0);
1558b2b77a04SHong Zhang }
1559b2b77a04SHong Zhang EXTERN_C_END
15603c3a9c75SAmlan Barua 
15613c3a9c75SAmlan Barua 
15623c3a9c75SAmlan Barua 
15633c3a9c75SAmlan Barua 
1564