xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 97504071fdc107a23ca747f96fac89fc69934dc1)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14e5d7f247SAmlan Barua   PetscInt    partial_dim;
15b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
16b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
19b2b77a04SHong Zhang } Mat_FFTW;
20b2b77a04SHong Zhang 
21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27b2b77a04SHong Zhang 
28*97504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel
29*97504071SAmlan Barua 
30*97504071SAmlan Barua    Input parameter:
31*97504071SAmlan Barua    mat - the matrix
32*97504071SAmlan Barua    x   - the vector on which FDFT will be performed
33*97504071SAmlan Barua 
34*97504071SAmlan Barua    Output parameter:
35*97504071SAmlan Barua    y   - vector that stores result of FDFT
36*97504071SAmlan Barua 
37*97504071SAmlan Barua */
38b2b77a04SHong Zhang #undef __FUNCT__
39b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
40b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
41b2b77a04SHong Zhang {
42b2b77a04SHong Zhang   PetscErrorCode ierr;
43b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
44b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
45b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
46b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
47b2b77a04SHong Zhang 
48b2b77a04SHong Zhang   PetscFunctionBegin;
49b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
50b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
51b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
52b2b77a04SHong Zhang     switch (ndim){
53b2b77a04SHong Zhang     case 1:
5458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
55b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
5658a912c5SAmlan Barua #else
5758a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
5858a912c5SAmlan Barua #endif
59b2b77a04SHong Zhang       break;
60b2b77a04SHong Zhang     case 2:
6158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
62b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6358a912c5SAmlan Barua #else
6458a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
6558a912c5SAmlan Barua #endif
66b2b77a04SHong Zhang       break;
67b2b77a04SHong Zhang     case 3:
6858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
69b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
7058a912c5SAmlan Barua #else
7158a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
7258a912c5SAmlan Barua #endif
73b2b77a04SHong Zhang       break;
74b2b77a04SHong Zhang     default:
7558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
76b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
7758a912c5SAmlan Barua #else
7858a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
7958a912c5SAmlan Barua #endif
80b2b77a04SHong Zhang       break;
81b2b77a04SHong Zhang     }
82b2b77a04SHong Zhang     fftw->finarray  = x_array;
83b2b77a04SHong Zhang     fftw->foutarray = y_array;
84b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
85b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
86b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
87b2b77a04SHong Zhang   } else { /* use existing plan */
88b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
89b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
90b2b77a04SHong Zhang     } else {
91b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
92b2b77a04SHong Zhang     }
93b2b77a04SHong Zhang   }
94b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
95b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
96b2b77a04SHong Zhang   PetscFunctionReturn(0);
97b2b77a04SHong Zhang }
98b2b77a04SHong Zhang 
99*97504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT
100*97504071SAmlan Barua 
101*97504071SAmlan Barua    Input parameter:
102*97504071SAmlan Barua    mat - the matrix
103*97504071SAmlan Barua    x   - the vector on which BDFT will be performed
104*97504071SAmlan Barua 
105*97504071SAmlan Barua    Output parameter:
106*97504071SAmlan Barua    y   - vector that stores result of BDFT
107*97504071SAmlan Barua 
108*97504071SAmlan Barua */
109*97504071SAmlan Barua 
110b2b77a04SHong Zhang #undef __FUNCT__
111b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
112b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
113b2b77a04SHong Zhang {
114b2b77a04SHong Zhang   PetscErrorCode ierr;
115b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
116b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
117b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
118b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
119b2b77a04SHong Zhang 
120b2b77a04SHong Zhang   PetscFunctionBegin;
121b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
122b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
123b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
124b2b77a04SHong Zhang     switch (ndim){
125b2b77a04SHong Zhang     case 1:
12658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
127b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12858a912c5SAmlan Barua #else
129e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
13058a912c5SAmlan Barua #endif
131b2b77a04SHong Zhang       break;
132b2b77a04SHong Zhang     case 2:
13358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
134b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
13558a912c5SAmlan Barua #else
136e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
13758a912c5SAmlan Barua #endif
138b2b77a04SHong Zhang       break;
139b2b77a04SHong Zhang     case 3:
14058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
141b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
14258a912c5SAmlan Barua #else
143e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
14458a912c5SAmlan Barua #endif
145b2b77a04SHong Zhang       break;
146b2b77a04SHong Zhang     default:
14758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
148b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
14958a912c5SAmlan Barua #else
150e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
15158a912c5SAmlan Barua #endif
152b2b77a04SHong Zhang       break;
153b2b77a04SHong Zhang     }
154b2b77a04SHong Zhang     fftw->binarray  = x_array;
155b2b77a04SHong Zhang     fftw->boutarray = y_array;
156b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
157b2b77a04SHong Zhang   } else { /* use existing plan */
158b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
159b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
160b2b77a04SHong Zhang     } else {
161b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
162b2b77a04SHong Zhang     }
163b2b77a04SHong Zhang   }
164b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
165b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
166b2b77a04SHong Zhang   PetscFunctionReturn(0);
167b2b77a04SHong Zhang }
168b2b77a04SHong Zhang 
169*97504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel
170*97504071SAmlan Barua 
171*97504071SAmlan Barua    Input parameter:
172*97504071SAmlan Barua    mat - the matrix
173*97504071SAmlan Barua    x   - the vector on which FDFT will be performed
174*97504071SAmlan Barua 
175*97504071SAmlan Barua    Output parameter:
176*97504071SAmlan Barua    y   - vector that stores result of FDFT
177*97504071SAmlan Barua 
178*97504071SAmlan Barua */
179*97504071SAmlan Barua 
180b2b77a04SHong Zhang #undef __FUNCT__
181b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
182b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
183b2b77a04SHong Zhang {
184b2b77a04SHong Zhang   PetscErrorCode ierr;
185b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
186b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
187b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
188c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
189b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
190b2b77a04SHong Zhang 
191b2b77a04SHong Zhang   PetscFunctionBegin;
192b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
193b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
194b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
195b2b77a04SHong Zhang     switch (ndim){
196b2b77a04SHong Zhang     case 1:
1973c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
198b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
199ae0a50aaSHong Zhang #else
200ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
2013c3a9c75SAmlan Barua #endif
202b2b77a04SHong Zhang       break;
203b2b77a04SHong Zhang     case 2:
204*97504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
205b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2063c3a9c75SAmlan Barua #else
2073c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2083c3a9c75SAmlan Barua #endif
209b2b77a04SHong Zhang       break;
210b2b77a04SHong Zhang     case 3:
2113c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
212b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2133c3a9c75SAmlan Barua #else
21451d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2153c3a9c75SAmlan Barua #endif
216b2b77a04SHong Zhang       break;
217b2b77a04SHong Zhang     default:
2183c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
219c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2203c3a9c75SAmlan Barua #else
2213c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2223c3a9c75SAmlan Barua #endif
223b2b77a04SHong Zhang       break;
224b2b77a04SHong Zhang     }
225b2b77a04SHong Zhang     fftw->finarray  = x_array;
226b2b77a04SHong Zhang     fftw->foutarray = y_array;
227b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
228b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
229b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
230b2b77a04SHong Zhang   } else { /* use existing plan */
231b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
232b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
233b2b77a04SHong Zhang     } else {
234b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
235b2b77a04SHong Zhang     }
236b2b77a04SHong Zhang   }
237b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
238b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
239b2b77a04SHong Zhang   PetscFunctionReturn(0);
240b2b77a04SHong Zhang }
241b2b77a04SHong Zhang 
242*97504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT
243*97504071SAmlan Barua 
244*97504071SAmlan Barua    Input parameter:
245*97504071SAmlan Barua    mat - the matrix
246*97504071SAmlan Barua    x   - the vector on which BDFT will be performed
247*97504071SAmlan Barua 
248*97504071SAmlan Barua    Output parameter:
249*97504071SAmlan Barua    y   - vector that stores result of BDFT
250*97504071SAmlan Barua 
251*97504071SAmlan Barua */
252b2b77a04SHong Zhang #undef __FUNCT__
253b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
254b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
255b2b77a04SHong Zhang {
256b2b77a04SHong Zhang   PetscErrorCode ierr;
257b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
258b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
259b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
260c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
261b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
262b2b77a04SHong Zhang 
263b2b77a04SHong Zhang   PetscFunctionBegin;
264b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
265b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
266b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
267b2b77a04SHong Zhang     switch (ndim){
268b2b77a04SHong Zhang     case 1:
2693c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
270b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
271ae0a50aaSHong Zhang #else
272ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
2733c3a9c75SAmlan Barua #endif
274b2b77a04SHong Zhang       break;
275b2b77a04SHong Zhang     case 2:
276*97504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
277b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2783c3a9c75SAmlan Barua #else
2793c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2803c3a9c75SAmlan Barua #endif
281b2b77a04SHong Zhang       break;
282b2b77a04SHong Zhang     case 3:
2833c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
284b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2853c3a9c75SAmlan Barua #else
2863c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2873c3a9c75SAmlan Barua #endif
288b2b77a04SHong Zhang       break;
289b2b77a04SHong Zhang     default:
2903c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
291c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2923c3a9c75SAmlan Barua #else
2933c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2943c3a9c75SAmlan Barua #endif
295b2b77a04SHong Zhang       break;
296b2b77a04SHong Zhang     }
297b2b77a04SHong Zhang     fftw->binarray  = x_array;
298b2b77a04SHong Zhang     fftw->boutarray = y_array;
299b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
300b2b77a04SHong Zhang   } else { /* use existing plan */
301b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
302b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
303b2b77a04SHong Zhang     } else {
304b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
305b2b77a04SHong Zhang     }
306b2b77a04SHong Zhang   }
307b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
308b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
309b2b77a04SHong Zhang   PetscFunctionReturn(0);
310b2b77a04SHong Zhang }
311b2b77a04SHong Zhang 
312b2b77a04SHong Zhang #undef __FUNCT__
313b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
314b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
315b2b77a04SHong Zhang {
316b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
317b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
318b2b77a04SHong Zhang   PetscErrorCode ierr;
319b2b77a04SHong Zhang 
320b2b77a04SHong Zhang   PetscFunctionBegin;
321b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
322b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
323b1301b2eSHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
324bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
325b2b77a04SHong Zhang   PetscFunctionReturn(0);
326b2b77a04SHong Zhang }
327b2b77a04SHong Zhang 
328c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
329b2b77a04SHong Zhang #undef __FUNCT__
330b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
331b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
332b2b77a04SHong Zhang {
333b2b77a04SHong Zhang   PetscErrorCode  ierr;
334b2b77a04SHong Zhang   PetscScalar     *array;
335b2b77a04SHong Zhang 
336b2b77a04SHong Zhang   PetscFunctionBegin;
337b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
338b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
339b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
340b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
341b2b77a04SHong Zhang   PetscFunctionReturn(0);
342b2b77a04SHong Zhang }
343b2b77a04SHong Zhang 
3443c3a9c75SAmlan Barua 
3454f7415efSAmlan Barua #undef __FUNCT__
3464be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW"
3474be45526SHong Zhang /*@
348b2aa233eSHong Zhang    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
3494f7415efSAmlan Barua      parallel layout determined by FFTW
3504f7415efSAmlan Barua 
3514f7415efSAmlan Barua    Collective on Mat
3524f7415efSAmlan Barua 
3534f7415efSAmlan Barua    Input Parameter:
3544f7415efSAmlan Barua .  mat - the matrix
3554f7415efSAmlan Barua 
3564f7415efSAmlan Barua    Output Parameter:
3574f7415efSAmlan Barua +   fin - (optional) input vector of forward FFTW
3584f7415efSAmlan Barua -   fout - (optional) output vector of forward FFTW
359*97504071SAmlan Barua -   bout - (optional) output vector of backward FFTW
3604f7415efSAmlan Barua 
3614f7415efSAmlan Barua   Level: advanced
362*97504071SAmlan Barua   Note: The parallel layout of output of forward FFTW is always same as the input
363*97504071SAmlan Barua         of the backward FFTW. But parallel layout of the input vector of forward
364*97504071SAmlan Barua         FFTW might not be same as the output of backward FFTW.
365*97504071SAmlan Barua         Also note that we need to provide enough space while doing parallel real transform.
366*97504071SAmlan Barua         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
367*97504071SAmlan Barua         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
368*97504071SAmlan Barua         last dimension from n to n/2+1 while invoking this routine.
3694f7415efSAmlan Barua 
3704f7415efSAmlan Barua .seealso: MatCreateFFTW()
3714be45526SHong Zhang @*/
3724be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
3734be45526SHong Zhang {
3744be45526SHong Zhang   PetscErrorCode ierr;
375b4c3921fSHong Zhang 
3764be45526SHong Zhang   PetscFunctionBegin;
3774be45526SHong Zhang   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
3784be45526SHong Zhang   PetscFunctionReturn(0);
3794be45526SHong Zhang }
3804be45526SHong Zhang 
3814f7415efSAmlan Barua EXTERN_C_BEGIN
3824be45526SHong Zhang #undef __FUNCT__
3834be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW"
3844be45526SHong Zhang PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
3854f7415efSAmlan Barua {
3864f7415efSAmlan Barua   PetscErrorCode ierr;
3874f7415efSAmlan Barua   PetscMPIInt    size,rank;
3884f7415efSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
3894f7415efSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
3904f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
3919496c9aeSAmlan Barua   PetscInt       N=fft->N;
3924f7415efSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
3934f7415efSAmlan Barua 
3944f7415efSAmlan Barua   PetscFunctionBegin;
3954f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
3964f7415efSAmlan Barua   PetscValidType(A,1);
3974f7415efSAmlan Barua 
3984f7415efSAmlan Barua   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
3994f7415efSAmlan Barua   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
4004f7415efSAmlan Barua   if (size == 1){ /* sequential case */
4014f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4024f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
4034f7415efSAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
4044f7415efSAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
4054f7415efSAmlan Barua #else
4068ca4c034SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
4078ca4c034SAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
4088ca4c034SAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
4094f7415efSAmlan Barua #endif
410*97504071SAmlan Barua   } else { /* parallel cases */
4114f7415efSAmlan Barua     ptrdiff_t      alloc_local,local_n0,local_0_start;
4129496c9aeSAmlan Barua     ptrdiff_t      local_n1;
4139496c9aeSAmlan Barua     fftw_complex   *data_fout;
4149496c9aeSAmlan Barua     ptrdiff_t      local_1_start;
4159496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4169496c9aeSAmlan Barua     fftw_complex   *data_fin,*data_bout;
4179496c9aeSAmlan Barua #else
4184f7415efSAmlan Barua     double         *data_finr,*data_boutr;
4199496c9aeSAmlan Barua     PetscInt       n1,N1,vsize;
4209496c9aeSAmlan Barua     ptrdiff_t      temp;
4219496c9aeSAmlan Barua #endif
4229496c9aeSAmlan Barua 
4234f7415efSAmlan Barua     switch (ndim){
4244f7415efSAmlan Barua           case 1:
42557625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
42657625b84SAmlan Barua                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
42757625b84SAmlan Barua #else
42857625b84SAmlan Barua                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
42957625b84SAmlan Barua                  if (fin) {
43057625b84SAmlan Barua                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
43157625b84SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
43257625b84SAmlan Barua                          (*fin)->ops->destroy = VecDestroy_MPIFFTW;
43357625b84SAmlan Barua                          }
43457625b84SAmlan Barua                  if (fout) {
43557625b84SAmlan Barua                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
43657625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
43757625b84SAmlan Barua                           (*fout)->ops->destroy = VecDestroy_MPIFFTW;
43857625b84SAmlan Barua                          }
43957625b84SAmlan Barua                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
44057625b84SAmlan Barua                  if (bout) {
44157625b84SAmlan Barua                           data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
44257625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
44357625b84SAmlan Barua                           (*bout)->ops->destroy = VecDestroy_MPIFFTW;
44457625b84SAmlan Barua                          }
44557625b84SAmlan Barua           break;
44657625b84SAmlan Barua #endif
4474f7415efSAmlan Barua           case 2:
448*97504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
4494f7415efSAmlan Barua                  alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4504f7415efSAmlan Barua                  N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
4514f7415efSAmlan Barua                  if (fin) {
4524f7415efSAmlan Barua                            data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4534f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
4544f7415efSAmlan Barua                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4554f7415efSAmlan Barua                           }
4564f7415efSAmlan Barua                  if (bout) {
4574f7415efSAmlan Barua                            data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4584f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
4594f7415efSAmlan Barua                            ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr);
4604f7415efSAmlan Barua                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4614f7415efSAmlan Barua                           }
462c8a8a4f0SAmlan Barua                  //n1 = 2*local_n1*dim[0];
46357625b84SAmlan Barua                  if (fout) {
46457625b84SAmlan Barua                             data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4659496c9aeSAmlan Barua                             ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
46657625b84SAmlan Barua                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
46757625b84SAmlan Barua                            }
4684f7415efSAmlan Barua #else
4694f7415efSAmlan Barua       /* Get local size */
4704f7415efSAmlan Barua                  alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
4714f7415efSAmlan Barua                  if (fin) {
4724f7415efSAmlan Barua                            data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4734f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
4744f7415efSAmlan Barua                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4754f7415efSAmlan Barua                           }
4764f7415efSAmlan Barua                  if (fout) {
4774f7415efSAmlan Barua                             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4784f7415efSAmlan Barua                             ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
4794f7415efSAmlan Barua                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
4804f7415efSAmlan Barua                            }
4814f7415efSAmlan Barua                  if (bout) {
4824f7415efSAmlan Barua                            data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4834f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
4844f7415efSAmlan Barua                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4854f7415efSAmlan Barua                           }
4864f7415efSAmlan Barua #endif
4874f7415efSAmlan Barua           break;
4884f7415efSAmlan Barua           case 3:
4894f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4904f7415efSAmlan Barua                  alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4914f7415efSAmlan Barua                  N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
4924f7415efSAmlan Barua                  if (fin) {
4934f7415efSAmlan Barua                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4944f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
4954f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4964f7415efSAmlan Barua                          }
4974f7415efSAmlan Barua                  if (bout) {
4984f7415efSAmlan Barua                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4994f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
5004f7415efSAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5014f7415efSAmlan Barua                           }
502c8a8a4f0SAmlan Barua                  //n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
50357625b84SAmlan Barua                  if (fout) {
50457625b84SAmlan Barua                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
50557625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
50657625b84SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
50757625b84SAmlan Barua                           }
5084f7415efSAmlan Barua #else
5090c9b18e4SAmlan Barua                  alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
5100c9b18e4SAmlan Barua                  if (fin) {
5110c9b18e4SAmlan Barua                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5120c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5130c9b18e4SAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5140c9b18e4SAmlan Barua                          }
5150c9b18e4SAmlan Barua                  if (fout) {
5160c9b18e4SAmlan Barua                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5170c9b18e4SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5180c9b18e4SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5190c9b18e4SAmlan Barua                          }
5200c9b18e4SAmlan Barua                  if (bout) {
5210c9b18e4SAmlan Barua                          data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5220c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5230c9b18e4SAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5240c9b18e4SAmlan Barua                          }
5254f7415efSAmlan Barua #endif
5264f7415efSAmlan Barua           break;
5274f7415efSAmlan Barua           default:
5284f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5294f7415efSAmlan Barua                  temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
5304f7415efSAmlan Barua                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
5314f7415efSAmlan Barua                  alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5324f7415efSAmlan Barua                  N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
5334f7415efSAmlan Barua                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
5344f7415efSAmlan Barua 
5354f7415efSAmlan Barua                  if (fin) {
5364f7415efSAmlan Barua                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5374f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
5384f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5394f7415efSAmlan Barua                         }
5404f7415efSAmlan Barua                  if (bout) {
5414f7415efSAmlan Barua                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5424f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
5439496c9aeSAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5444f7415efSAmlan Barua                         }
545c8a8a4f0SAmlan Barua                  //temp = fftw->partial_dim;
546c8a8a4f0SAmlan Barua                  //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
547c8a8a4f0SAmlan Barua                  //n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
54857625b84SAmlan Barua                  if (fout) {
54957625b84SAmlan Barua                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
55057625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
55157625b84SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
55257625b84SAmlan Barua                         }
5534f7415efSAmlan Barua #else
5540c9b18e4SAmlan Barua                 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
5550c9b18e4SAmlan Barua                 if (fin) {
5560c9b18e4SAmlan Barua                        data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5570c9b18e4SAmlan Barua                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5580c9b18e4SAmlan Barua                        (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5590c9b18e4SAmlan Barua                        }
5600c9b18e4SAmlan Barua                 if (fout) {
5610c9b18e4SAmlan Barua                          data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5620c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5630c9b18e4SAmlan Barua                          (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5640c9b18e4SAmlan Barua                        }
5650c9b18e4SAmlan Barua                 if (bout) {
5660c9b18e4SAmlan Barua                        data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5670c9b18e4SAmlan Barua                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5680c9b18e4SAmlan Barua                        (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5690c9b18e4SAmlan Barua                        }
5704f7415efSAmlan Barua #endif
5714f7415efSAmlan Barua             break;
5724f7415efSAmlan Barua           }
5734f7415efSAmlan Barua   }
5744f7415efSAmlan Barua   PetscFunctionReturn(0);
5754f7415efSAmlan Barua }
5764f7415efSAmlan Barua EXTERN_C_END
5774f7415efSAmlan Barua 
578c92418ccSAmlan Barua #undef __FUNCT__
579b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW"
580b2b77a04SHong Zhang /*
581b2b77a04SHong Zhang    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
582b2b77a04SHong Zhang      parallel layout determined by FFTW
583b2b77a04SHong Zhang 
584b2b77a04SHong Zhang    Collective on Mat
585b2b77a04SHong Zhang 
586b2b77a04SHong Zhang    Input Parameter:
587b2b77a04SHong Zhang .  mat - the matrix
588b2b77a04SHong Zhang 
589b2b77a04SHong Zhang    Output Parameter:
590b2b77a04SHong Zhang +   fin - (optional) input vector of forward FFTW
591b2b77a04SHong Zhang -   fout - (optional) output vector of forward FFTW
592b2b77a04SHong Zhang 
593b2b77a04SHong Zhang   Level: advanced
594b2b77a04SHong Zhang 
595b2b77a04SHong Zhang .seealso: MatCreateFFTW()
596b2b77a04SHong Zhang */
597b2b77a04SHong Zhang PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
598b2b77a04SHong Zhang {
599b2b77a04SHong Zhang   PetscErrorCode ierr;
600b2b77a04SHong Zhang   PetscMPIInt    size,rank;
601b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
602b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
603c92418ccSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
6049496c9aeSAmlan Barua   PetscInt       N=fft->N;
605e81bb599SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
606b2b77a04SHong Zhang 
607b2b77a04SHong Zhang   PetscFunctionBegin;
608b2b77a04SHong Zhang   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
609b2b77a04SHong Zhang   PetscValidType(A,1);
610b2b77a04SHong Zhang 
611b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
612b2b77a04SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
613b2b77a04SHong Zhang   if (size == 1){ /* sequential case */
614e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX)
615b2b77a04SHong Zhang     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
616b2b77a04SHong Zhang     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
617e5d7f247SAmlan Barua #else
618e81bb599SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
619e81bb599SAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
620e81bb599SAmlan Barua #endif
621b2b77a04SHong Zhang   } else {        /* mpi case */
622b2b77a04SHong Zhang     ptrdiff_t      alloc_local,local_n0,local_0_start;
6236882372aSHong Zhang     ptrdiff_t      local_n1,local_1_end;
624b2b77a04SHong Zhang     fftw_complex   *data_fin,*data_fout;
6259496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
62651d1eed7SAmlan Barua     double         *data_finr ;
627b3a17365SAmlan Barua     ptrdiff_t      local_1_start,temp;
6289496c9aeSAmlan Barua     PetscInt       vsize,n1,N1;
6299496c9aeSAmlan Barua #endif
6309496c9aeSAmlan Barua 
631c92418ccSAmlan Barua //    PetscInt ctr;
632c92418ccSAmlan Barua //    ptrdiff_t      ndim1,*pdim;
633c92418ccSAmlan Barua //    ndim1=(ptrdiff_t) ndim;
634c92418ccSAmlan Barua //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
63511902ff2SHong Zhang 
636c92418ccSAmlan Barua //    for(ctr=0;ctr<ndim;ctr++)
637f76f76beSAmlan Barua //        {k
638c92418ccSAmlan Barua //           pdim[ctr] = dim[ctr];
639c92418ccSAmlan Barua //       }
640b2b77a04SHong Zhang 
641f76f76beSAmlan Barua 
642f76f76beSAmlan Barua 
643b2b77a04SHong Zhang     switch (ndim){
644b2b77a04SHong Zhang     case 1:
6456882372aSHong Zhang       /* Get local size */
6466882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
6476882372aSHong Zhang       if (fin) {
6486882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6496882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
6506882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6516882372aSHong Zhang       }
6526882372aSHong Zhang       if (fout) {
6536882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
65457625b84SAmlan Barua         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6556882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6566882372aSHong Zhang       }
657b2b77a04SHong Zhang       break;
658b2b77a04SHong Zhang     case 2:
6593c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6603c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
6613c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
6623c3a9c75SAmlan Barua       if (fin) {
6633c3a9c75SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
66454dd5118SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
6653c3a9c75SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
66609dd8a53SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
6673c3a9c75SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6683c3a9c75SAmlan Barua       }
66957625b84SAmlan Barua       n1 = 2*local_n1*(dim[0]);
67057625b84SAmlan Barua       //n1 = 2*local_n1*dim[1];
671b4c3921fSHong Zhang       //printf("The values are %ld\n",local_n1);
6723c3a9c75SAmlan Barua       if (fout) {
67309dd8a53SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
67409dd8a53SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6753c3a9c75SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6763c3a9c75SAmlan Barua       }
6773c3a9c75SAmlan Barua 
6783c3a9c75SAmlan Barua #else
679b2b77a04SHong Zhang       /* Get local size */
68064657d84SAmlan Barua      //printf("Hope this does not come here");
681b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
682b2b77a04SHong Zhang       if (fin) {
683b2b77a04SHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
684b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
685b2b77a04SHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
686b2b77a04SHong Zhang       }
687b2b77a04SHong Zhang       if (fout) {
688b2b77a04SHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
689b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
690b2b77a04SHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
691b2b77a04SHong Zhang       }
69264657d84SAmlan Barua      //printf("Hope this does not come here");
6933c3a9c75SAmlan Barua #endif
694b2b77a04SHong Zhang       break;
695b2b77a04SHong Zhang     case 3:
6966882372aSHong Zhang       /* Get local size */
6973c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
69851d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
69951d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
70051d1eed7SAmlan Barua       if (fin) {
70151d1eed7SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
70251d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
70351d1eed7SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
70451d1eed7SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
70551d1eed7SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
70651d1eed7SAmlan Barua       }
70757625b84SAmlan Barua       n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
70851d1eed7SAmlan Barua       if (fout) {
70951d1eed7SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
71051d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
71151d1eed7SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
71251d1eed7SAmlan Barua       }
71351d1eed7SAmlan Barua 
71451d1eed7SAmlan Barua 
7153c3a9c75SAmlan Barua #else
7166882372aSHong Zhang       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
71711902ff2SHong Zhang //      printf("The quantity n is %d",n);
7186882372aSHong Zhang       if (fin) {
7196882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7206882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
7216882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
7226882372aSHong Zhang       }
7236882372aSHong Zhang       if (fout) {
7246882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7256882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
7266882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
7276882372aSHong Zhang       }
7283c3a9c75SAmlan Barua #endif
729b2b77a04SHong Zhang       break;
730b2b77a04SHong Zhang     default:
7316882372aSHong Zhang       /* Get local size */
7323c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
733b3a17365SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
734b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
735b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
736b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
737b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
738b3a17365SAmlan Barua       if (fin) {
739b3a17365SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
740b3a17365SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
741b3a17365SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
742b3a17365SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
743b3a17365SAmlan Barua       }
74457625b84SAmlan Barua       temp = fftw->partial_dim;
74557625b84SAmlan Barua       fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
74657625b84SAmlan Barua 
74757625b84SAmlan Barua       n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
748b3a17365SAmlan Barua       if (fout) {
749b3a17365SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
75057625b84SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
751b3a17365SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
752b3a17365SAmlan Barua       }
753b3a17365SAmlan Barua 
7543c3a9c75SAmlan Barua #else
755c92418ccSAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
7566882372aSHong Zhang       if (fin) {
7576882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7586882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
7596882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
7606882372aSHong Zhang       }
7616882372aSHong Zhang       if (fout) {
7626882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7636882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
7646882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
7656882372aSHong Zhang       }
7663c3a9c75SAmlan Barua #endif
767b2b77a04SHong Zhang       break;
768b2b77a04SHong Zhang     }
769b2b77a04SHong Zhang   }
77054dd5118SAmlan Barua //  if (fin){
77154dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
77254dd5118SAmlan Barua //  }
77354dd5118SAmlan Barua //  if (fout){
77454dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
77554dd5118SAmlan Barua //  }
776b2b77a04SHong Zhang   PetscFunctionReturn(0);
777b2b77a04SHong Zhang }
778b2b77a04SHong Zhang 
779b2b77a04SHong Zhang #undef __FUNCT__
78074a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW"
78174a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
7823c3a9c75SAmlan Barua {
7833c3a9c75SAmlan Barua   PetscErrorCode ierr;
7843c3a9c75SAmlan Barua   PetscFunctionBegin;
78574a26884SAmlan Barua   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
7863c3a9c75SAmlan Barua   PetscFunctionReturn(0);
7873c3a9c75SAmlan Barua }
78854efbe56SHong Zhang 
789b2b77a04SHong Zhang /*
790*97504071SAmlan Barua       VecScatterPetscToFFTW_FFTW - Copies the user data to the vector that goes into FFTW block.
7913c3a9c75SAmlan Barua   Input A, x, y
7923c3a9c75SAmlan Barua   A - FFTW matrix
79354dd5118SAmlan Barua   x - user data
794b2b77a04SHong Zhang   Options Database Keys:
795b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags
796b2b77a04SHong Zhang 
797b2b77a04SHong Zhang    Level: intermediate
798*97504071SAmlan Barua    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
799*97504071SAmlan Barua          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
800*97504071SAmlan Barua          zeros. This routine does that job by scattering operation.
801*97504071SAmlan Barua 
802b2b77a04SHong Zhang 
803b2b77a04SHong Zhang */
8043c3a9c75SAmlan Barua 
8053c3a9c75SAmlan Barua EXTERN_C_BEGIN
8063c3a9c75SAmlan Barua #undef __FUNCT__
80774a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW_FTTW"
80874a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
8093c3a9c75SAmlan Barua {
8103c3a9c75SAmlan Barua   PetscErrorCode ierr;
8113c3a9c75SAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
8123c3a9c75SAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
8133c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
8149496c9aeSAmlan Barua   PetscInt       N=fft->N;
815b223cc97SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
8169496c9aeSAmlan Barua   PetscInt       low;
8178ca4c034SAmlan Barua   PetscInt       rank,size,vsize,vsize1;
8183c3a9c75SAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
8199496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
8203c3a9c75SAmlan Barua   VecScatter     vecscat;
8213c3a9c75SAmlan Barua   IS             list1,list2;
8229496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
8239496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
8249496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
8259496c9aeSAmlan Barua   PetscInt       N1, n1 ,NM;
8269496c9aeSAmlan Barua   ptrdiff_t      temp;
8279496c9aeSAmlan Barua #endif
8283c3a9c75SAmlan Barua 
829f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
830f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
8313c3a9c75SAmlan Barua   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
8323c3a9c75SAmlan Barua 
833e81bb599SAmlan Barua   if (size==1)
834e81bb599SAmlan Barua     {
8358ca4c034SAmlan Barua           ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
8368ca4c034SAmlan Barua           ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
837b4c3921fSHong Zhang           //printf("The values of sizes are %d and %d",vsize,vsize1);
8389496c9aeSAmlan Barua           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
8396971673cSAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
8406971673cSAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8416971673cSAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8426971673cSAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
843b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
844e81bb599SAmlan Barua     }
845e81bb599SAmlan Barua 
846e81bb599SAmlan Barua  else{
847e81bb599SAmlan Barua 
8483c3a9c75SAmlan Barua  switch (ndim){
8493c3a9c75SAmlan Barua  case 1:
85064657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
85164657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
85264657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
85364657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
85464657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
85564657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
85664657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
85764657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
85864657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
85964657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
86064657d84SAmlan Barua #else
861e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
86264657d84SAmlan Barua #endif
8633c3a9c75SAmlan Barua  break;
8643c3a9c75SAmlan Barua  case 2:
865bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
866bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
867bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
868bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
869bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
870bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
873bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
874bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
875bd59e6c6SAmlan Barua #else
8763c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
8773c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
8789496c9aeSAmlan Barua       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
8799496c9aeSAmlan Barua       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
8809496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
8819496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
8823c3a9c75SAmlan Barua 
8833c3a9c75SAmlan Barua       if (dim[1]%2==0)
8843c3a9c75SAmlan Barua         NM = dim[1]+2;
8853c3a9c75SAmlan Barua       else
8863c3a9c75SAmlan Barua         NM = dim[1]+1;
8873c3a9c75SAmlan Barua 
8883c3a9c75SAmlan Barua       for (i=0;i<local_n0;i++){
8893c3a9c75SAmlan Barua          for (j=0;j<dim[1];j++){
8903c3a9c75SAmlan Barua             tempindx = i*dim[1] + j;
8913c3a9c75SAmlan Barua             tempindx1 = i*NM + j;
8925b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
8933c3a9c75SAmlan Barua             indx2[tempindx]=low+tempindx1;
8943c3a9c75SAmlan Barua         }
8953c3a9c75SAmlan Barua      }
8963c3a9c75SAmlan Barua 
8973c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
8983c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
8993c3a9c75SAmlan Barua 
900f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
901f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
902f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
903f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
904b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
905b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
906b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
907b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
908bd59e6c6SAmlan Barua #endif
9099496c9aeSAmlan Barua  break;
9103c3a9c75SAmlan Barua 
9113c3a9c75SAmlan Barua  case 3:
912bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
913bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
914bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
915bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
916bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
917bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
918bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
919bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
920bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
921bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
922bd59e6c6SAmlan Barua #else
92351d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
92451d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
92551d1eed7SAmlan Barua 
9269496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
9279496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
92851d1eed7SAmlan Barua 
92951d1eed7SAmlan Barua       if (dim[2]%2==0)
93051d1eed7SAmlan Barua         NM = dim[2]+2;
93151d1eed7SAmlan Barua       else
93251d1eed7SAmlan Barua         NM = dim[2]+1;
93351d1eed7SAmlan Barua 
93451d1eed7SAmlan Barua       for (i=0;i<local_n0;i++){
93551d1eed7SAmlan Barua          for (j=0;j<dim[1];j++){
93651d1eed7SAmlan Barua             for (k=0;k<dim[2];k++){
93751d1eed7SAmlan Barua                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
93851d1eed7SAmlan Barua                tempindx1 = i*dim[1]*NM + j*NM + k;
93951d1eed7SAmlan Barua                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
94051d1eed7SAmlan Barua                indx2[tempindx]=low+tempindx1;
94151d1eed7SAmlan Barua             }
94251d1eed7SAmlan Barua          }
94351d1eed7SAmlan Barua       }
94451d1eed7SAmlan Barua 
94551d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
94651d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
94751d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
94851d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
94951d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
95051d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
951b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
952b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
953b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
954b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
955bd59e6c6SAmlan Barua #endif
9569496c9aeSAmlan Barua  break;
9573c3a9c75SAmlan Barua 
9583c3a9c75SAmlan Barua  default:
959bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
960bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
961bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
962bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
963bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
964bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
965bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
966bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
967bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
968bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
969bd59e6c6SAmlan Barua #else
970e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
971e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
972e5d7f247SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
973e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
974e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
975e5d7f247SAmlan Barua 
976e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
977e5d7f247SAmlan Barua 
978e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
979e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
980e5d7f247SAmlan Barua 
981e5d7f247SAmlan Barua       if (dim[ndim-1]%2==0)
982ba6e06d0SAmlan Barua         NM = 2;
983e5d7f247SAmlan Barua       else
984ba6e06d0SAmlan Barua         NM = 1;
985e5d7f247SAmlan Barua 
9866971673cSAmlan Barua       j = low;
9876971673cSAmlan Barua       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
9886971673cSAmlan Barua          {
9896971673cSAmlan Barua           indx1[i] = local_0_start*partial_dim + i;
9906971673cSAmlan Barua           indx2[i] = j;
9916971673cSAmlan Barua           if (k%dim[ndim-1]==0)
9926971673cSAmlan Barua             { j+=NM;}
9936971673cSAmlan Barua           j++;
9946971673cSAmlan Barua          }
9956971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
9966971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
9976971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
9986971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9996971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10006971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1001b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1002b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1003b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1004b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1005bd59e6c6SAmlan Barua #endif
10069496c9aeSAmlan Barua  break;
10073c3a9c75SAmlan Barua   }
1008e81bb599SAmlan Barua  }
10091abc6020SAmlan Barua  return(0);
10103c3a9c75SAmlan Barua }
10113c3a9c75SAmlan Barua EXTERN_C_END
10123c3a9c75SAmlan Barua 
10133c3a9c75SAmlan Barua #undef __FUNCT__
101474a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc"
101574a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
10163c3a9c75SAmlan Barua {
10173c3a9c75SAmlan Barua   PetscErrorCode ierr;
10183c3a9c75SAmlan Barua   PetscFunctionBegin;
101974a26884SAmlan Barua   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
10203c3a9c75SAmlan Barua   PetscFunctionReturn(0);
10213c3a9c75SAmlan Barua }
102254efbe56SHong Zhang 
10235b3e41ffSAmlan Barua /*
1024*97504071SAmlan Barua       VecScatterFFTWToPetsc_FFTW - Converts FFTW output to the PETSc vector that user can use.
10255b3e41ffSAmlan Barua   Input A, x, y
10265b3e41ffSAmlan Barua   A - FFTW matrix
10275b3e41ffSAmlan Barua   x - FFTW vector
10285b3e41ffSAmlan Barua   y - PETSc vector that user can read
10295b3e41ffSAmlan Barua 
10305b3e41ffSAmlan Barua    Level: intermediate
1031*97504071SAmlan Barua    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
1032*97504071SAmlan Barua          VecScatterFFTWToPetsc removes those extra zeros.
10335b3e41ffSAmlan Barua 
10343c3a9c75SAmlan Barua */
10353c3a9c75SAmlan Barua 
10363c3a9c75SAmlan Barua EXTERN_C_BEGIN
10373c3a9c75SAmlan Barua #undef __FUNCT__
103874a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
103974a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
10405b3e41ffSAmlan Barua {
10415b3e41ffSAmlan Barua   PetscErrorCode ierr;
10425b3e41ffSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
10435b3e41ffSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
10445b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
10459496c9aeSAmlan Barua   PetscInt       N=fft->N;
1046b3655f67SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
10479496c9aeSAmlan Barua   PetscInt       low;
10489496c9aeSAmlan Barua   PetscInt       rank,size;
10495b3e41ffSAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
10509496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
10515b3e41ffSAmlan Barua   VecScatter     vecscat;
10525b3e41ffSAmlan Barua   IS             list1,list2;
10539496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
10549496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
10559496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
10569496c9aeSAmlan Barua   PetscInt       N1, n1 ,NM;
10579496c9aeSAmlan Barua   ptrdiff_t      temp;
10589496c9aeSAmlan Barua #endif
10599496c9aeSAmlan Barua 
10605b3e41ffSAmlan Barua 
10615b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
10625b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1063b3655f67SAmlan Barua   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr);
10645b3e41ffSAmlan Barua 
1065e81bb599SAmlan Barua   if (size==1){
1066b3655f67SAmlan Barua     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
10676971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
10686971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10696971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10706971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1071b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
1072e81bb599SAmlan Barua   }
1073e81bb599SAmlan Barua   else{
1074e81bb599SAmlan Barua 
1075e81bb599SAmlan Barua   switch (ndim){
1076e81bb599SAmlan Barua   case 1:
107764657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
107864657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
10799496c9aeSAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1);
10809496c9aeSAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2);
108164657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
108264657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
108364657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
108464657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
108564657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
108664657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
108764657d84SAmlan Barua #else
10886a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
108964657d84SAmlan Barua #endif
10905b3e41ffSAmlan Barua   break;
10915b3e41ffSAmlan Barua   case 2:
1092bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1093bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1094bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1095bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
1096bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1097bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1098bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1099bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1100bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1101bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1102bd59e6c6SAmlan Barua #else
11035b3e41ffSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
11045b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
11055b3e41ffSAmlan Barua 
11069496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
11079496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
11085b3e41ffSAmlan Barua 
11095b3e41ffSAmlan Barua       if (dim[1]%2==0)
11105b3e41ffSAmlan Barua         NM = dim[1]+2;
11115b3e41ffSAmlan Barua       else
11125b3e41ffSAmlan Barua         NM = dim[1]+1;
11135b3e41ffSAmlan Barua 
11145b3e41ffSAmlan Barua       for (i=0;i<local_n0;i++){
11155b3e41ffSAmlan Barua          for (j=0;j<dim[1];j++){
11165b3e41ffSAmlan Barua             tempindx = i*dim[1] + j;
11175b3e41ffSAmlan Barua             tempindx1 = i*NM + j;
11185b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
11195b3e41ffSAmlan Barua             indx2[tempindx]=low+tempindx1;
11205b3e41ffSAmlan Barua          }
11215b3e41ffSAmlan Barua        }
11225b3e41ffSAmlan Barua 
11235b3e41ffSAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
11245b3e41ffSAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
11255b3e41ffSAmlan Barua 
11265b3e41ffSAmlan Barua        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
11275b3e41ffSAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
11285b3e41ffSAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
11295b3e41ffSAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1130b223cc97SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1131b223cc97SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1132b223cc97SAmlan Barua        ierr = PetscFree(indx1);CHKERRQ(ierr);
1133b223cc97SAmlan Barua        ierr = PetscFree(indx2);CHKERRQ(ierr);
1134bd59e6c6SAmlan Barua #endif
11359496c9aeSAmlan Barua   break;
11365b3e41ffSAmlan Barua   case 3:
1137bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1138bd59e6c6SAmlan Barua        alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1139bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1140bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1141bd59e6c6SAmlan Barua        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1142bd59e6c6SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1143bd59e6c6SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1144bd59e6c6SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1145bd59e6c6SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1146bd59e6c6SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1147bd59e6c6SAmlan Barua #else
1148bd59e6c6SAmlan Barua 
114951d1eed7SAmlan Barua        alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
115051d1eed7SAmlan Barua        N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
115151d1eed7SAmlan Barua 
11529496c9aeSAmlan Barua //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
11539496c9aeSAmlan Barua //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
11549496c9aeSAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
11559496c9aeSAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
115651d1eed7SAmlan Barua 
115751d1eed7SAmlan Barua        if (dim[2]%2==0)
115851d1eed7SAmlan Barua         NM = dim[2]+2;
115951d1eed7SAmlan Barua        else
116051d1eed7SAmlan Barua         NM = dim[2]+1;
116151d1eed7SAmlan Barua 
116251d1eed7SAmlan Barua        for (i=0;i<local_n0;i++){
116351d1eed7SAmlan Barua           for (j=0;j<dim[1];j++){
116451d1eed7SAmlan Barua              for (k=0;k<dim[2];k++){
116551d1eed7SAmlan Barua                 tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
116651d1eed7SAmlan Barua                 tempindx1 = i*dim[1]*NM + j*NM + k;
116751d1eed7SAmlan Barua                 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
116851d1eed7SAmlan Barua                 indx2[tempindx]=low+tempindx1;
116951d1eed7SAmlan Barua              }
117051d1eed7SAmlan Barua           }
117151d1eed7SAmlan Barua        }
117251d1eed7SAmlan Barua 
117351d1eed7SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
117451d1eed7SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
117551d1eed7SAmlan Barua 
117651d1eed7SAmlan Barua        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
117751d1eed7SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
117851d1eed7SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
117951d1eed7SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1180b223cc97SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1181b223cc97SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1182b223cc97SAmlan Barua        ierr = PetscFree(indx1);CHKERRQ(ierr);
1183b223cc97SAmlan Barua        ierr = PetscFree(indx2);CHKERRQ(ierr);
1184bd59e6c6SAmlan Barua #endif
11859496c9aeSAmlan Barua   break;
11865b3e41ffSAmlan Barua   default:
1187bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1188bd59e6c6SAmlan Barua        alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1189bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1190bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1191bd59e6c6SAmlan Barua        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1192bd59e6c6SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1193bd59e6c6SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1194bd59e6c6SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1195bd59e6c6SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1196bd59e6c6SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1197bd59e6c6SAmlan Barua #else
1198ba6e06d0SAmlan Barua        temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1199ba6e06d0SAmlan Barua        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1200ba6e06d0SAmlan Barua        alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1201ba6e06d0SAmlan Barua        N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1202ba6e06d0SAmlan Barua        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1203ba6e06d0SAmlan Barua 
1204ba6e06d0SAmlan Barua        partial_dim = fftw->partial_dim;
1205ba6e06d0SAmlan Barua 
1206ba6e06d0SAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1207ba6e06d0SAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1208ba6e06d0SAmlan Barua 
1209ba6e06d0SAmlan Barua        if (dim[ndim-1]%2==0)
1210ba6e06d0SAmlan Barua          NM = 2;
1211ba6e06d0SAmlan Barua        else
1212ba6e06d0SAmlan Barua          NM = 1;
1213ba6e06d0SAmlan Barua 
1214ba6e06d0SAmlan Barua        j = low;
1215ba6e06d0SAmlan Barua        for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1216ba6e06d0SAmlan Barua           {
1217ba6e06d0SAmlan Barua            indx1[i] = local_0_start*partial_dim + i;
1218ba6e06d0SAmlan Barua            indx2[i] = j;
1219ba6e06d0SAmlan Barua            if (k%dim[ndim-1]==0)
1220ba6e06d0SAmlan Barua              { j+=NM;}
1221ba6e06d0SAmlan Barua            j++;
1222ba6e06d0SAmlan Barua           }
1223ba6e06d0SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1224ba6e06d0SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1225ba6e06d0SAmlan Barua 
1226ba6e06d0SAmlan Barua        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1227ba6e06d0SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1228ba6e06d0SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229ba6e06d0SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1230b223cc97SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1231b223cc97SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1232b223cc97SAmlan Barua        ierr = PetscFree(indx1);CHKERRQ(ierr);
1233b223cc97SAmlan Barua        ierr = PetscFree(indx2);CHKERRQ(ierr);
1234bd59e6c6SAmlan Barua #endif
12359496c9aeSAmlan Barua   break;
12365b3e41ffSAmlan Barua   }
1237e81bb599SAmlan Barua   }
12385b3e41ffSAmlan Barua   return 0;
12395b3e41ffSAmlan Barua }
12405b3e41ffSAmlan Barua EXTERN_C_END
12415b3e41ffSAmlan Barua 
12425b3e41ffSAmlan Barua EXTERN_C_BEGIN
12435b3e41ffSAmlan Barua #undef __FUNCT__
12443c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
12453c3a9c75SAmlan Barua /*
12463c3a9c75SAmlan Barua       MatCreate_FFTW - Creates a matrix object that provides FFT
12473c3a9c75SAmlan Barua   via the external package FFTW
12483c3a9c75SAmlan Barua   Options Database Keys:
12493c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
12503c3a9c75SAmlan Barua 
12513c3a9c75SAmlan Barua    Level: intermediate
12523c3a9c75SAmlan Barua 
12533c3a9c75SAmlan Barua */
12543c3a9c75SAmlan Barua 
1255b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A)
1256b2b77a04SHong Zhang {
1257b2b77a04SHong Zhang   PetscErrorCode ierr;
1258b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
1259b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1260b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1261b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1262b2b77a04SHong Zhang   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1263b2b77a04SHong Zhang   PetscBool      flg;
12644f48bc67SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr;
126511902ff2SHong Zhang   PetscMPIInt    size,rank;
12669496c9aeSAmlan Barua   ptrdiff_t      *pdim;
12679496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
12689496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12699496c9aeSAmlan Barua    ptrdiff_t     temp;
12708ca4c034SAmlan Barua    PetscInt      N1,tot_dim;
12714f48bc67SAmlan Barua #else
12724f48bc67SAmlan Barua    PetscInt n1;
12739496c9aeSAmlan Barua #endif
12749496c9aeSAmlan Barua 
1275b2b77a04SHong Zhang   PetscFunctionBegin;
1276b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
127711902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1278c92418ccSAmlan Barua 
127911902ff2SHong Zhang   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
128011902ff2SHong Zhang   pdim[0] = dim[0];
12818ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12828ca4c034SAmlan Barua   tot_dim = 2*dim[0];
12838ca4c034SAmlan Barua #endif
128411902ff2SHong Zhang   for (ctr=1;ctr<ndim;ctr++)
128511902ff2SHong Zhang      {
12866882372aSHong Zhang           partial_dim *= dim[ctr];
128711902ff2SHong Zhang           pdim[ctr] = dim[ctr];
12888ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12898ca4c034SAmlan Barua           if (ctr==ndim-1)
12908ca4c034SAmlan Barua             tot_dim *= (dim[ctr]/2+1);
12918ca4c034SAmlan Barua           else
12928ca4c034SAmlan Barua             tot_dim *= dim[ctr];
12938ca4c034SAmlan Barua #endif
12946882372aSHong Zhang      }
12953c3a9c75SAmlan Barua 
1296b2b77a04SHong Zhang   if (size == 1) {
1297e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1298b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1299b2b77a04SHong Zhang     n = N;
1300e81bb599SAmlan Barua #else
1301e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1302e81bb599SAmlan Barua     n = tot_dim;
1303e81bb599SAmlan Barua #endif
1304e81bb599SAmlan Barua 
1305b2b77a04SHong Zhang   } else {
13061abc6020SAmlan Barua     ptrdiff_t alloc_local,local_n0,local_0_start;
1307b2b77a04SHong Zhang     switch (ndim){
1308b2b77a04SHong Zhang     case 1:
13093c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
13103c3a9c75SAmlan Barua    SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1311e5d7f247SAmlan Barua #else
13129496c9aeSAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
13136882372aSHong Zhang       n = (PetscInt)local_n0;
13149496c9aeSAmlan Barua       n1 = (PetscInt) local_n1;
13159496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1316e5d7f247SAmlan Barua #endif
1317b2b77a04SHong Zhang       break;
1318b2b77a04SHong Zhang     case 2:
13195b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
1320b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1321b2b77a04SHong Zhang       /*
1322b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1323b2b77a04SHong Zhang        PetscSynchronizedFlush(comm);
1324b2b77a04SHong Zhang        */
1325b2b77a04SHong Zhang       n = (PetscInt)local_n0*dim[1];
1326b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
13275b3e41ffSAmlan Barua #else
13285b3e41ffSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
13295b3e41ffSAmlan Barua       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1330c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
13315b3e41ffSAmlan Barua #endif
1332b2b77a04SHong Zhang       break;
1333b2b77a04SHong Zhang     case 3:
133451d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
133551d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
13366882372aSHong Zhang       n = (PetscInt)local_n0*dim[1]*dim[2];
13376882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
133851d1eed7SAmlan Barua #else
133951d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
134051d1eed7SAmlan Barua       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1341c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
134251d1eed7SAmlan Barua #endif
1343b2b77a04SHong Zhang       break;
1344b2b77a04SHong Zhang     default:
1345b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
134611902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
13476882372aSHong Zhang       n = (PetscInt)local_n0*partial_dim;
13486882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1349b3a17365SAmlan Barua #else
1350b3a17365SAmlan Barua       temp = pdim[ndim-1];
1351b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
1352b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1353b3a17365SAmlan Barua       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1354b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1355b3a17365SAmlan Barua       pdim[ndim-1] = temp;
1356c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1357b3a17365SAmlan Barua #endif
1358b2b77a04SHong Zhang       break;
1359b2b77a04SHong Zhang     }
1360b2b77a04SHong Zhang   }
1361b2b77a04SHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1362b2b77a04SHong Zhang   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1363b2b77a04SHong Zhang   fft->data = (void*)fftw;
1364b2b77a04SHong Zhang 
1365b2b77a04SHong Zhang   fft->n           = n;
1366c92418ccSAmlan Barua   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1367e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
1368c92418ccSAmlan Barua   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1369b1301b2eSHong Zhang   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1370c92418ccSAmlan Barua 
1371b2b77a04SHong Zhang   fftw->p_forward  = 0;
1372b2b77a04SHong Zhang   fftw->p_backward = 0;
1373b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1374b2b77a04SHong Zhang 
1375b2b77a04SHong Zhang   if (size == 1){
1376b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1377b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1378b2b77a04SHong Zhang   } else {
1379b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1380b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1381b2b77a04SHong Zhang   }
1382b2b77a04SHong Zhang   fft->matdestroy          = MatDestroy_FFTW;
13834be45526SHong Zhang   //A->ops->getvecs       = MatGetVecs_FFTW;
1384b2b77a04SHong Zhang   A->assembled          = PETSC_TRUE;
13854be45526SHong Zhang   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);
138674a26884SAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW);
138774a26884SAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW);
1388b2b77a04SHong Zhang 
1389b2b77a04SHong Zhang   /* get runtime options */
1390b2b77a04SHong Zhang   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1391b2b77a04SHong Zhang     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1392b2b77a04SHong Zhang     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1393b2b77a04SHong Zhang   PetscOptionsEnd();
1394b2b77a04SHong Zhang   PetscFunctionReturn(0);
1395b2b77a04SHong Zhang }
1396b2b77a04SHong Zhang EXTERN_C_END
13973c3a9c75SAmlan Barua 
13983c3a9c75SAmlan Barua 
13993c3a9c75SAmlan Barua 
14003c3a9c75SAmlan Barua 
1401