xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 26fbe8dc1a3c99fb8dddfa572c8c6b3b4ce3ca53)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14e5d7f247SAmlan Barua   PetscInt    partial_dim;
15b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
16b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
19b2b77a04SHong Zhang } Mat_FFTW;
20b2b77a04SHong Zhang 
21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27b2b77a04SHong Zhang 
2897504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel
2997504071SAmlan Barua    Input parameter:
303564f093SHong Zhang      A - the matrix
3197504071SAmlan Barua      x - the vector on which FDFT will be performed
3297504071SAmlan Barua 
3397504071SAmlan Barua    Output parameter:
3497504071SAmlan Barua      y - vector that stores result of FDFT
3597504071SAmlan Barua */
36b2b77a04SHong Zhang #undef __FUNCT__
37b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
38b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
39b2b77a04SHong Zhang {
40b2b77a04SHong Zhang   PetscErrorCode ierr;
41b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
42b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
43b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
44b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
45b2b77a04SHong Zhang 
46b2b77a04SHong Zhang   PetscFunctionBegin;
47b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
48b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
49b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
50b2b77a04SHong Zhang     switch (ndim) {
51b2b77a04SHong Zhang     case 1:
5258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
53b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
5458a912c5SAmlan Barua #else
5558a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
5658a912c5SAmlan Barua #endif
57b2b77a04SHong Zhang       break;
58b2b77a04SHong Zhang     case 2:
5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
60b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6158a912c5SAmlan Barua #else
6258a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
6358a912c5SAmlan Barua #endif
64b2b77a04SHong Zhang       break;
65b2b77a04SHong Zhang     case 3:
6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
67b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6858a912c5SAmlan Barua #else
6958a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
7058a912c5SAmlan Barua #endif
71b2b77a04SHong Zhang       break;
72b2b77a04SHong Zhang     default:
7358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
74b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
7558a912c5SAmlan Barua #else
7658a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
7758a912c5SAmlan Barua #endif
78b2b77a04SHong Zhang       break;
79b2b77a04SHong Zhang     }
80b2b77a04SHong Zhang     fftw->finarray  = x_array;
81b2b77a04SHong Zhang     fftw->foutarray = y_array;
82b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
83b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
84b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
85b2b77a04SHong Zhang   } else { /* use existing plan */
86b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
87b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
88b2b77a04SHong Zhang     } else {
89b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
90b2b77a04SHong Zhang     }
91b2b77a04SHong Zhang   }
92b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
93b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
94b2b77a04SHong Zhang   PetscFunctionReturn(0);
95b2b77a04SHong Zhang }
96b2b77a04SHong Zhang 
9797504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT
9897504071SAmlan Barua    Input parameter:
993564f093SHong Zhang      A - the matrix
10097504071SAmlan Barua      x - the vector on which BDFT will be performed
10197504071SAmlan Barua 
10297504071SAmlan Barua    Output parameter:
10397504071SAmlan Barua      y - vector that stores result of BDFT
10497504071SAmlan Barua */
10597504071SAmlan Barua 
106b2b77a04SHong Zhang #undef __FUNCT__
107b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
108b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
109b2b77a04SHong Zhang {
110b2b77a04SHong Zhang   PetscErrorCode ierr;
111b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
112b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
113b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
114b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
115b2b77a04SHong Zhang 
116b2b77a04SHong Zhang   PetscFunctionBegin;
117b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
118b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
119b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
120b2b77a04SHong Zhang     switch (ndim) {
121b2b77a04SHong Zhang     case 1:
12258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
123b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12458a912c5SAmlan Barua #else
125e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
12658a912c5SAmlan Barua #endif
127b2b77a04SHong Zhang       break;
128b2b77a04SHong Zhang     case 2:
12958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
130b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
13158a912c5SAmlan Barua #else
132e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
13358a912c5SAmlan Barua #endif
134b2b77a04SHong Zhang       break;
135b2b77a04SHong Zhang     case 3:
13658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
137b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
13858a912c5SAmlan Barua #else
139e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
14058a912c5SAmlan Barua #endif
141b2b77a04SHong Zhang       break;
142b2b77a04SHong Zhang     default:
14358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
144b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
14558a912c5SAmlan Barua #else
146e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
14758a912c5SAmlan Barua #endif
148b2b77a04SHong Zhang       break;
149b2b77a04SHong Zhang     }
150b2b77a04SHong Zhang     fftw->binarray  = x_array;
151b2b77a04SHong Zhang     fftw->boutarray = y_array;
152b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
153b2b77a04SHong Zhang   } else { /* use existing plan */
154b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
155b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
156b2b77a04SHong Zhang     } else {
157b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
158b2b77a04SHong Zhang     }
159b2b77a04SHong Zhang   }
160b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
161b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
162b2b77a04SHong Zhang   PetscFunctionReturn(0);
163b2b77a04SHong Zhang }
164b2b77a04SHong Zhang 
16597504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel
16697504071SAmlan Barua    Input parameter:
1673564f093SHong Zhang      A - the matrix
16897504071SAmlan Barua      x - the vector on which FDFT will be performed
16997504071SAmlan Barua 
17097504071SAmlan Barua    Output parameter:
17197504071SAmlan Barua    y   - vector that stores result of FDFT
17297504071SAmlan Barua */
173b2b77a04SHong Zhang #undef __FUNCT__
174b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
175b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
176b2b77a04SHong Zhang {
177b2b77a04SHong Zhang   PetscErrorCode ierr;
178b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
179b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
180b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
181c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
182b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
183b2b77a04SHong Zhang 
184b2b77a04SHong Zhang   PetscFunctionBegin;
185b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
186b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
187b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
188b2b77a04SHong Zhang     switch (ndim) {
189b2b77a04SHong Zhang     case 1:
1903c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
191b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
192ae0a50aaSHong Zhang #else
1934f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
1943c3a9c75SAmlan Barua #endif
195b2b77a04SHong Zhang       break;
196b2b77a04SHong Zhang     case 2:
19797504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
198b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1993c3a9c75SAmlan Barua #else
2003c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2013c3a9c75SAmlan Barua #endif
202b2b77a04SHong Zhang       break;
203b2b77a04SHong Zhang     case 3:
2043c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
205b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2063c3a9c75SAmlan Barua #else
20751d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2083c3a9c75SAmlan Barua #endif
209b2b77a04SHong Zhang       break;
210b2b77a04SHong Zhang     default:
2113c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
212c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2133c3a9c75SAmlan Barua #else
2143c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2153c3a9c75SAmlan Barua #endif
216b2b77a04SHong Zhang       break;
217b2b77a04SHong Zhang     }
218b2b77a04SHong Zhang     fftw->finarray  = x_array;
219b2b77a04SHong Zhang     fftw->foutarray = y_array;
220b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
221b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
222b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
223b2b77a04SHong Zhang   } else { /* use existing plan */
224b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
225b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
226b2b77a04SHong Zhang     } else {
227b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
228b2b77a04SHong Zhang     }
229b2b77a04SHong Zhang   }
230b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
231b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
232b2b77a04SHong Zhang   PetscFunctionReturn(0);
233b2b77a04SHong Zhang }
234b2b77a04SHong Zhang 
23597504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT
23697504071SAmlan Barua    Input parameter:
2373564f093SHong Zhang      A - the matrix
23897504071SAmlan Barua      x - the vector on which BDFT will be performed
23997504071SAmlan Barua 
24097504071SAmlan Barua    Output parameter:
24197504071SAmlan Barua      y - vector that stores result of BDFT
24297504071SAmlan Barua */
243b2b77a04SHong Zhang #undef __FUNCT__
244b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
245b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
246b2b77a04SHong Zhang {
247b2b77a04SHong Zhang   PetscErrorCode ierr;
248b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
249b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
250b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
251c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
252b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
253b2b77a04SHong Zhang 
254b2b77a04SHong Zhang   PetscFunctionBegin;
255b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
256b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
257b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
258b2b77a04SHong Zhang     switch (ndim) {
259b2b77a04SHong Zhang     case 1:
2603c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
261b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
262ae0a50aaSHong Zhang #else
2634f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
2643c3a9c75SAmlan Barua #endif
265b2b77a04SHong Zhang       break;
266b2b77a04SHong Zhang     case 2:
26797504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
268b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2693c3a9c75SAmlan Barua #else
2703c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
2713c3a9c75SAmlan Barua #endif
272b2b77a04SHong Zhang       break;
273b2b77a04SHong Zhang     case 3:
2743c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
275b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2763c3a9c75SAmlan Barua #else
2773c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
2783c3a9c75SAmlan Barua #endif
279b2b77a04SHong Zhang       break;
280b2b77a04SHong Zhang     default:
2813c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
282c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2833c3a9c75SAmlan Barua #else
2843c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
2853c3a9c75SAmlan Barua #endif
286b2b77a04SHong Zhang       break;
287b2b77a04SHong Zhang     }
288b2b77a04SHong Zhang     fftw->binarray  = x_array;
289b2b77a04SHong Zhang     fftw->boutarray = y_array;
290b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
291b2b77a04SHong Zhang   } else { /* use existing plan */
292b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
293b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
294b2b77a04SHong Zhang     } else {
295b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
296b2b77a04SHong Zhang     }
297b2b77a04SHong Zhang   }
298b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
299b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
300b2b77a04SHong Zhang   PetscFunctionReturn(0);
301b2b77a04SHong Zhang }
302b2b77a04SHong Zhang 
303b2b77a04SHong Zhang #undef __FUNCT__
304b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
305b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
306b2b77a04SHong Zhang {
307b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
308b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
309b2b77a04SHong Zhang   PetscErrorCode ierr;
310b2b77a04SHong Zhang 
311b2b77a04SHong Zhang   PetscFunctionBegin;
312b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
313b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
314b1301b2eSHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
315bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
3166ccf0b3eSHong Zhang   fftw_mpi_cleanup();
317b2b77a04SHong Zhang   PetscFunctionReturn(0);
318b2b77a04SHong Zhang }
319b2b77a04SHong Zhang 
320c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
321b2b77a04SHong Zhang #undef __FUNCT__
322b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
323b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
324b2b77a04SHong Zhang {
325b2b77a04SHong Zhang   PetscErrorCode ierr;
326b2b77a04SHong Zhang   PetscScalar    *array;
327b2b77a04SHong Zhang 
328b2b77a04SHong Zhang   PetscFunctionBegin;
329b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
330b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
331b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
332b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
333b2b77a04SHong Zhang   PetscFunctionReturn(0);
334b2b77a04SHong Zhang }
335b2b77a04SHong Zhang 
3364f7415efSAmlan Barua #undef __FUNCT__
3374be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW"
3384be45526SHong Zhang /*@
339b2aa233eSHong Zhang    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
3404f7415efSAmlan Barua      parallel layout determined by FFTW
3414f7415efSAmlan Barua 
3424f7415efSAmlan Barua    Collective on Mat
3434f7415efSAmlan Barua 
3444f7415efSAmlan Barua    Input Parameter:
3453564f093SHong Zhang .   A - the matrix
3464f7415efSAmlan Barua 
3474f7415efSAmlan Barua    Output Parameter:
348cc55f3b1SHong Zhang +   x - (optional) input vector of forward FFTW
349cc55f3b1SHong Zhang -   y - (optional) output vector of forward FFTW
350cc55f3b1SHong Zhang -   z - (optional) output vector of backward FFTW
3514f7415efSAmlan Barua 
3524f7415efSAmlan Barua   Level: advanced
3533564f093SHong Zhang 
35497504071SAmlan Barua   Note: The parallel layout of output of forward FFTW is always same as the input
35597504071SAmlan Barua         of the backward FFTW. But parallel layout of the input vector of forward
35697504071SAmlan Barua         FFTW might not be same as the output of backward FFTW.
35797504071SAmlan Barua         Also note that we need to provide enough space while doing parallel real transform.
35897504071SAmlan Barua         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
35997504071SAmlan Barua         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
360e0ec536eSAmlan Barua         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
361e0ec536eSAmlan Barua         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
362e0ec536eSAmlan Barua         zeros if it is odd only one zero is needed.
363e0ec536eSAmlan Barua         Lastly one needs some scratch space at the end of data set in each process. alloc_local
364e0ec536eSAmlan Barua         figures out how much space is needed, i.e. it figures out the data+scratch space for
365e0ec536eSAmlan Barua         each processor and returns that.
3664f7415efSAmlan Barua 
3674f7415efSAmlan Barua .seealso: MatCreateFFTW()
3684be45526SHong Zhang @*/
3694be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
3704be45526SHong Zhang {
3714be45526SHong Zhang   PetscErrorCode ierr;
372b4c3921fSHong Zhang 
3734be45526SHong Zhang   PetscFunctionBegin;
3744be45526SHong Zhang   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
3754be45526SHong Zhang   PetscFunctionReturn(0);
3764be45526SHong Zhang }
3774be45526SHong Zhang 
3784f7415efSAmlan Barua EXTERN_C_BEGIN
3794be45526SHong Zhang #undef __FUNCT__
3804be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW"
3814be45526SHong Zhang PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
3824f7415efSAmlan Barua {
3834f7415efSAmlan Barua   PetscErrorCode ierr;
3844f7415efSAmlan Barua   PetscMPIInt    size,rank;
3854f7415efSAmlan Barua   MPI_Comm       comm  = ((PetscObject)A)->comm;
3864f7415efSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
3874f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
3889496c9aeSAmlan Barua   PetscInt       N     = fft->N;
3894f7415efSAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
3904f7415efSAmlan Barua 
3914f7415efSAmlan Barua   PetscFunctionBegin;
3924f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
3934f7415efSAmlan Barua   PetscValidType(A,1);
3944f7415efSAmlan Barua 
3954f8276c3SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
3964f8276c3SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
3974f7415efSAmlan Barua   if (size == 1) { /* sequential case */
3984f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
3994f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
4004f7415efSAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
4014f7415efSAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
4024f7415efSAmlan Barua #else
4038ca4c034SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
4048ca4c034SAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
4058ca4c034SAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
4064f7415efSAmlan Barua #endif
40797504071SAmlan Barua   } else { /* parallel cases */
4084f7415efSAmlan Barua     ptrdiff_t    alloc_local,local_n0,local_0_start;
4099496c9aeSAmlan Barua     ptrdiff_t    local_n1;
4109496c9aeSAmlan Barua     fftw_complex *data_fout;
4119496c9aeSAmlan Barua     ptrdiff_t    local_1_start;
4129496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4139496c9aeSAmlan Barua     fftw_complex *data_fin,*data_bout;
4149496c9aeSAmlan Barua #else
4154f7415efSAmlan Barua     double    *data_finr,*data_boutr;
4166ccf0b3eSHong Zhang     PetscInt  n1,N1;
4179496c9aeSAmlan Barua     ptrdiff_t temp;
4189496c9aeSAmlan Barua #endif
4199496c9aeSAmlan Barua 
4204f7415efSAmlan Barua     switch (ndim) {
4214f7415efSAmlan Barua     case 1:
42257625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4234f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
42457625b84SAmlan Barua #else
42557625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
42657625b84SAmlan Barua       if (fin) {
42757625b84SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
428778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
429*26fbe8dcSKarl Rupp 
43057625b84SAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
43157625b84SAmlan Barua       }
43257625b84SAmlan Barua       if (fout) {
43357625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
434778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
435*26fbe8dcSKarl Rupp 
43657625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
43757625b84SAmlan Barua       }
43857625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
43957625b84SAmlan Barua       if (bout) {
44057625b84SAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
441778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
442*26fbe8dcSKarl Rupp 
44357625b84SAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
44457625b84SAmlan Barua       }
44557625b84SAmlan Barua       break;
44657625b84SAmlan Barua #endif
4474f7415efSAmlan Barua     case 2:
44897504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
4494f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4504f7415efSAmlan Barua       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
4514f7415efSAmlan Barua       if (fin) {
4524f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
453778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
454*26fbe8dcSKarl Rupp 
4554f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
4564f7415efSAmlan Barua       }
4574f7415efSAmlan Barua       if (bout) {
4584f7415efSAmlan Barua         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
459778a2246SBarry Smith         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
460*26fbe8dcSKarl Rupp 
4614f7415efSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
4624f7415efSAmlan Barua       }
46357625b84SAmlan Barua       if (fout) {
46457625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
465778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
466*26fbe8dcSKarl Rupp 
46757625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
46857625b84SAmlan Barua       }
4694f7415efSAmlan Barua #else
4704f7415efSAmlan Barua       /* Get local size */
4714f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
4724f7415efSAmlan Barua       if (fin) {
4734f7415efSAmlan Barua         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
474778a2246SBarry Smith         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
475*26fbe8dcSKarl Rupp 
4764f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
4774f7415efSAmlan Barua       }
4784f7415efSAmlan Barua       if (fout) {
4794f7415efSAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
480778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
481*26fbe8dcSKarl Rupp 
4824f7415efSAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
4834f7415efSAmlan Barua       }
4844f7415efSAmlan Barua       if (bout) {
4854f7415efSAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
486778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
487*26fbe8dcSKarl Rupp 
4884f7415efSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
4894f7415efSAmlan Barua       }
4904f7415efSAmlan Barua #endif
4914f7415efSAmlan Barua       break;
4924f7415efSAmlan Barua     case 3:
4934f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4944f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4954f7415efSAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
4964f7415efSAmlan Barua       if (fin) {
4974f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
498778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
499*26fbe8dcSKarl Rupp 
5004f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5014f7415efSAmlan Barua       }
5024f7415efSAmlan Barua       if (bout) {
5034f7415efSAmlan Barua         data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
504778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
505*26fbe8dcSKarl Rupp 
5064f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5074f7415efSAmlan Barua       }
5083564f093SHong Zhang 
50957625b84SAmlan Barua       if (fout) {
51057625b84SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
511778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
512*26fbe8dcSKarl Rupp 
51357625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
51457625b84SAmlan Barua       }
5154f7415efSAmlan Barua #else
5160c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
5170c9b18e4SAmlan Barua       if (fin) {
5180c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
519778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
520*26fbe8dcSKarl Rupp 
5210c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5220c9b18e4SAmlan Barua       }
5230c9b18e4SAmlan Barua       if (fout) {
5240c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
525778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
526*26fbe8dcSKarl Rupp 
5270c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5280c9b18e4SAmlan Barua       }
5290c9b18e4SAmlan Barua       if (bout) {
5300c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
531778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
532*26fbe8dcSKarl Rupp 
5330c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5340c9b18e4SAmlan Barua       }
5354f7415efSAmlan Barua #endif
5364f7415efSAmlan Barua       break;
5374f7415efSAmlan Barua     default:
5384f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5394f7415efSAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
540*26fbe8dcSKarl Rupp 
5414f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
542*26fbe8dcSKarl Rupp 
5434f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5444f7415efSAmlan Barua       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
545*26fbe8dcSKarl Rupp 
5464f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
5474f7415efSAmlan Barua 
5484f7415efSAmlan Barua       if (fin) {
5494f7415efSAmlan Barua         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
550778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
551*26fbe8dcSKarl Rupp 
5524f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5534f7415efSAmlan Barua       }
5544f7415efSAmlan Barua       if (bout) {
5554f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
556778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
557*26fbe8dcSKarl Rupp 
5589496c9aeSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5594f7415efSAmlan Barua       }
5603564f093SHong Zhang 
56157625b84SAmlan Barua       if (fout) {
56257625b84SAmlan Barua         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
563778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
564*26fbe8dcSKarl Rupp 
56557625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
56657625b84SAmlan Barua       }
5674f7415efSAmlan Barua #else
5680c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
5690c9b18e4SAmlan Barua       if (fin) {
5700c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
571778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
572*26fbe8dcSKarl Rupp 
5730c9b18e4SAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5740c9b18e4SAmlan Barua       }
5750c9b18e4SAmlan Barua       if (fout) {
5760c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
577778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
578*26fbe8dcSKarl Rupp 
5790c9b18e4SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
5800c9b18e4SAmlan Barua       }
5810c9b18e4SAmlan Barua       if (bout) {
5820c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
583778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
584*26fbe8dcSKarl Rupp 
5850c9b18e4SAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5860c9b18e4SAmlan Barua       }
5874f7415efSAmlan Barua #endif
5884f7415efSAmlan Barua       break;
5894f7415efSAmlan Barua     }
5904f7415efSAmlan Barua   }
5914f7415efSAmlan Barua   PetscFunctionReturn(0);
5924f7415efSAmlan Barua }
5934f7415efSAmlan Barua EXTERN_C_END
5944f7415efSAmlan Barua 
595c92418ccSAmlan Barua #undef __FUNCT__
59674a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW"
5973564f093SHong Zhang /*@
5983564f093SHong Zhang    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
59954efbe56SHong Zhang 
6003564f093SHong Zhang    Collective on Mat
6013564f093SHong Zhang 
6023564f093SHong Zhang    Input Parameters:
6033564f093SHong Zhang +  A - FFTW matrix
6043564f093SHong Zhang -  x - the PETSc vector
6053564f093SHong Zhang 
6063564f093SHong Zhang    Output Parameters:
6073564f093SHong Zhang .  y - the FFTW vector
6083564f093SHong Zhang 
609b2b77a04SHong Zhang   Options Database Keys:
6103564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags
611b2b77a04SHong Zhang 
612b2b77a04SHong Zhang    Level: intermediate
6133564f093SHong Zhang 
61497504071SAmlan Barua    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
61597504071SAmlan Barua          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
61697504071SAmlan Barua          zeros. This routine does that job by scattering operation.
61797504071SAmlan Barua 
6183564f093SHong Zhang .seealso: VecScatterFFTWToPetsc()
6193564f093SHong Zhang @*/
6203564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
6213564f093SHong Zhang {
6223564f093SHong Zhang   PetscErrorCode ierr;
623b2b77a04SHong Zhang 
6243564f093SHong Zhang   PetscFunctionBegin;
6253564f093SHong Zhang   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
6263564f093SHong Zhang   PetscFunctionReturn(0);
6273564f093SHong Zhang }
6283c3a9c75SAmlan Barua 
6293c3a9c75SAmlan Barua EXTERN_C_BEGIN
6303c3a9c75SAmlan Barua #undef __FUNCT__
6311986ecc6SHong Zhang #define __FUNCT__ "VecScatterPetscToFFTW_FFTW"
63274a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
6333c3a9c75SAmlan Barua {
6343c3a9c75SAmlan Barua   PetscErrorCode ierr;
6353c3a9c75SAmlan Barua   MPI_Comm       comm  =((PetscObject)A)->comm;
6363c3a9c75SAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
6373c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
6389496c9aeSAmlan Barua   PetscInt       N     =fft->N;
639b5d11533SAmlan Barua   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
6409496c9aeSAmlan Barua   PetscInt       low;
6418ca4c034SAmlan Barua   PetscInt       rank,size,vsize,vsize1;
6423c3a9c75SAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
6439496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
6443c3a9c75SAmlan Barua   VecScatter     vecscat;
6453c3a9c75SAmlan Barua   IS             list1,list2;
6469496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6479496c9aeSAmlan Barua   PetscInt  i,j,k,partial_dim;
6489496c9aeSAmlan Barua   PetscInt  *indx1, *indx2, tempindx, tempindx1;
6499496c9aeSAmlan Barua   PetscInt  N1, n1,NM;
6509496c9aeSAmlan Barua   ptrdiff_t temp;
6519496c9aeSAmlan Barua #endif
6523c3a9c75SAmlan Barua 
6533564f093SHong Zhang   PetscFunctionBegin;
654f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
655f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
6563c3a9c75SAmlan Barua   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
6573c3a9c75SAmlan Barua 
6583564f093SHong Zhang   if (size==1) {
6598ca4c034SAmlan Barua     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
6608ca4c034SAmlan Barua     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
6619496c9aeSAmlan Barua     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
6626971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
6636971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
6646971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
6656971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
666b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
6673564f093SHong Zhang   } else {
6683c3a9c75SAmlan Barua     switch (ndim) {
6693c3a9c75SAmlan Barua     case 1:
67064657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
67164657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
672*26fbe8dcSKarl Rupp 
6734f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);
6744f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,low,1,&list2);
67564657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
67664657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
67764657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
67864657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
67964657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
68064657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
68164657d84SAmlan Barua #else
682e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
68364657d84SAmlan Barua #endif
6843c3a9c75SAmlan Barua       break;
6853c3a9c75SAmlan Barua     case 2:
686bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
687bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
688*26fbe8dcSKarl Rupp 
6894f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
6904f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
691bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
692bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
693bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
694bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
695bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
696bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
697bd59e6c6SAmlan Barua #else
6983c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
699*26fbe8dcSKarl Rupp 
7003c3a9c75SAmlan Barua       N1   = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
7019496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
7029496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
7033c3a9c75SAmlan Barua 
7043564f093SHong Zhang       if (dim[1]%2==0) {
7053c3a9c75SAmlan Barua         NM = dim[1]+2;
7063564f093SHong Zhang       } else {
7073c3a9c75SAmlan Barua         NM = dim[1]+1;
7083564f093SHong Zhang       }
7093c3a9c75SAmlan Barua       for (i=0; i<local_n0; i++) {
7103c3a9c75SAmlan Barua         for (j=0; j<dim[1]; j++) {
7113c3a9c75SAmlan Barua           tempindx  = i*dim[1] + j;
7123c3a9c75SAmlan Barua           tempindx1 = i*NM + j;
713*26fbe8dcSKarl Rupp 
7145b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
7153c3a9c75SAmlan Barua           indx2[tempindx]=low+tempindx1;
7163c3a9c75SAmlan Barua         }
7173c3a9c75SAmlan Barua       }
7183c3a9c75SAmlan Barua 
7193c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
7203c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
7213c3a9c75SAmlan Barua 
722f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
723f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
724f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
726b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
727b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
728b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
729b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
730bd59e6c6SAmlan Barua #endif
7319496c9aeSAmlan Barua       break;
7323c3a9c75SAmlan Barua 
7333c3a9c75SAmlan Barua     case 3:
734bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
735bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
736*26fbe8dcSKarl Rupp 
7374f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
7384f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
739bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
740bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
741bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
742bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
743bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
744bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
745bd59e6c6SAmlan Barua #else
74651d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
747*26fbe8dcSKarl Rupp 
74851d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
74951d1eed7SAmlan Barua 
7509496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
7519496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
75251d1eed7SAmlan Barua 
753db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
754db4deed7SKarl Rupp       else             NM = dim[2]+1;
75551d1eed7SAmlan Barua 
75651d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
75751d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
75851d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
75951d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
76051d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
761*26fbe8dcSKarl Rupp 
76251d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
76351d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
76451d1eed7SAmlan Barua           }
76551d1eed7SAmlan Barua         }
76651d1eed7SAmlan Barua       }
76751d1eed7SAmlan Barua 
76851d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
76951d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
77051d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
77151d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
77251d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
77351d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
774b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
775b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
776b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
777b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
778bd59e6c6SAmlan Barua #endif
7799496c9aeSAmlan Barua       break;
7803c3a9c75SAmlan Barua 
7813c3a9c75SAmlan Barua     default:
782bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
783bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
784*26fbe8dcSKarl Rupp 
7854f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
7864f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
787bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
788bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
789bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
790bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
791bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
792bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
793bd59e6c6SAmlan Barua #else
794e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
795*26fbe8dcSKarl Rupp 
796e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
797*26fbe8dcSKarl Rupp 
798e5d7f247SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
799*26fbe8dcSKarl Rupp 
800e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
801*26fbe8dcSKarl Rupp 
802e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
803e5d7f247SAmlan Barua 
804e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
805e5d7f247SAmlan Barua 
806e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
807e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
808e5d7f247SAmlan Barua 
809db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
810db4deed7SKarl Rupp       else                  NM = 1;
811e5d7f247SAmlan Barua 
8126971673cSAmlan Barua       j = low;
8133564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
8146971673cSAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
8156971673cSAmlan Barua         indx2[i] = j;
816*26fbe8dcSKarl Rupp         if (k%dim[ndim-1]==0) j+=NM;
8176971673cSAmlan Barua         j++;
8186971673cSAmlan Barua       }
8196971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
8206971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
8216971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
8226971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8236971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8246971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
825b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
826b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
827b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
828b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
829bd59e6c6SAmlan Barua #endif
8309496c9aeSAmlan Barua       break;
8313c3a9c75SAmlan Barua     }
832e81bb599SAmlan Barua   }
8333564f093SHong Zhang   PetscFunctionReturn(0);
8343c3a9c75SAmlan Barua }
8353c3a9c75SAmlan Barua EXTERN_C_END
8363c3a9c75SAmlan Barua 
8373c3a9c75SAmlan Barua #undef __FUNCT__
83874a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc"
8393564f093SHong Zhang /*@
8403564f093SHong Zhang    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
8413564f093SHong Zhang 
8423564f093SHong Zhang    Collective on Mat
8433564f093SHong Zhang 
8443564f093SHong Zhang     Input Parameters:
8453564f093SHong Zhang +   A - FFTW matrix
8463564f093SHong Zhang -   x - FFTW vector
8473564f093SHong Zhang 
8483564f093SHong Zhang    Output Parameters:
8493564f093SHong Zhang .  y - PETSc vector
8503564f093SHong Zhang 
8513564f093SHong Zhang    Level: intermediate
8523564f093SHong Zhang 
8533564f093SHong Zhang    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
8543564f093SHong Zhang          VecScatterFFTWToPetsc removes those extra zeros.
8553564f093SHong Zhang 
8563564f093SHong Zhang .seealso: VecScatterPetscToFFTW()
8573564f093SHong Zhang @*/
85874a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
8593c3a9c75SAmlan Barua {
8603c3a9c75SAmlan Barua   PetscErrorCode ierr;
8615fd66863SKarl Rupp 
8623c3a9c75SAmlan Barua   PetscFunctionBegin;
86374a26884SAmlan Barua   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
8643c3a9c75SAmlan Barua   PetscFunctionReturn(0);
8653c3a9c75SAmlan Barua }
86654efbe56SHong Zhang 
8673c3a9c75SAmlan Barua EXTERN_C_BEGIN
8683c3a9c75SAmlan Barua #undef __FUNCT__
86974a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
87074a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
8715b3e41ffSAmlan Barua {
8725b3e41ffSAmlan Barua   PetscErrorCode ierr;
8735b3e41ffSAmlan Barua   MPI_Comm       comm  = ((PetscObject)A)->comm;
8745b3e41ffSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
8755b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
8769496c9aeSAmlan Barua   PetscInt       N     = fft->N;
877b3655f67SAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
8789496c9aeSAmlan Barua   PetscInt       low;
8799496c9aeSAmlan Barua   PetscInt       rank,size;
8805b3e41ffSAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
8819496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
8825b3e41ffSAmlan Barua   VecScatter     vecscat;
8835b3e41ffSAmlan Barua   IS             list1,list2;
8849496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
8859496c9aeSAmlan Barua   PetscInt  i,j,k,partial_dim;
8869496c9aeSAmlan Barua   PetscInt  *indx1, *indx2, tempindx, tempindx1;
8879496c9aeSAmlan Barua   PetscInt  N1, n1,NM;
8889496c9aeSAmlan Barua   ptrdiff_t temp;
8899496c9aeSAmlan Barua #endif
8909496c9aeSAmlan Barua 
8913564f093SHong Zhang   PetscFunctionBegin;
8925b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
8935b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
894b3655f67SAmlan Barua   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr);
8955b3e41ffSAmlan Barua 
896e81bb599SAmlan Barua   if (size==1) {
897b3655f67SAmlan Barua     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
8986971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
8996971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9006971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9016971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
902b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
903e81bb599SAmlan Barua 
9043564f093SHong Zhang   } else {
905e81bb599SAmlan Barua     switch (ndim) {
906e81bb599SAmlan Barua     case 1:
90764657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
90864657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
909*26fbe8dcSKarl Rupp 
9104f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);
9114f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,low,1,&list2);
91264657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
91364657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
91464657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
91564657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
91664657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
91764657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
91864657d84SAmlan Barua #else
9196a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
92064657d84SAmlan Barua #endif
9215b3e41ffSAmlan Barua       break;
9225b3e41ffSAmlan Barua     case 2:
923bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
924bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
925*26fbe8dcSKarl Rupp 
9264f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
9274f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
928bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
929bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
930bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
931bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
932bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
933bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
934bd59e6c6SAmlan Barua #else
9355b3e41ffSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
936*26fbe8dcSKarl Rupp 
9375b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
9385b3e41ffSAmlan Barua 
9399496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
9409496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
9415b3e41ffSAmlan Barua 
942db4deed7SKarl Rupp       if (dim[1]%2==0) NM = dim[1]+2;
943db4deed7SKarl Rupp       else             NM = dim[1]+1;
9445b3e41ffSAmlan Barua 
9455b3e41ffSAmlan Barua       for (i=0; i<local_n0; i++) {
9465b3e41ffSAmlan Barua         for (j=0; j<dim[1]; j++) {
9475b3e41ffSAmlan Barua           tempindx = i*dim[1] + j;
9485b3e41ffSAmlan Barua           tempindx1 = i*NM + j;
949*26fbe8dcSKarl Rupp 
9505b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
9515b3e41ffSAmlan Barua           indx2[tempindx]=low+tempindx1;
9525b3e41ffSAmlan Barua         }
9535b3e41ffSAmlan Barua       }
9545b3e41ffSAmlan Barua 
9555b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
9565b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
9575b3e41ffSAmlan Barua 
9585b3e41ffSAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
9595b3e41ffSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9605b3e41ffSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9615b3e41ffSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
962b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
963b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
964b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
965b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
966bd59e6c6SAmlan Barua #endif
9679496c9aeSAmlan Barua       break;
9685b3e41ffSAmlan Barua     case 3:
969bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
970bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
971*26fbe8dcSKarl Rupp 
9724f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
9734f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
974bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
975bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
976bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
977bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
978bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
979bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
980bd59e6c6SAmlan Barua #else
981bd59e6c6SAmlan Barua 
98251d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
983*26fbe8dcSKarl Rupp 
98451d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
98551d1eed7SAmlan Barua 
9869496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
9879496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
98851d1eed7SAmlan Barua 
989db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
990db4deed7SKarl Rupp       else             NM = dim[2]+1;
99151d1eed7SAmlan Barua 
99251d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
99351d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
99451d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
99551d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
99651d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
997*26fbe8dcSKarl Rupp 
99851d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
99951d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
100051d1eed7SAmlan Barua           }
100151d1eed7SAmlan Barua         }
100251d1eed7SAmlan Barua       }
100351d1eed7SAmlan Barua 
100451d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
100551d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
100651d1eed7SAmlan Barua 
100751d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
100851d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
100951d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
101051d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1011b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1012b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1013b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1014b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1015bd59e6c6SAmlan Barua #endif
10169496c9aeSAmlan Barua       break;
10175b3e41ffSAmlan Barua     default:
1018bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1019bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1020*26fbe8dcSKarl Rupp 
10214f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
10224f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1023bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1024bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1025bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1026bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1027bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1028bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1029bd59e6c6SAmlan Barua #else
1030ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1031*26fbe8dcSKarl Rupp 
1032ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1033*26fbe8dcSKarl Rupp 
1034ba6e06d0SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1035*26fbe8dcSKarl Rupp 
1036ba6e06d0SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1037*26fbe8dcSKarl Rupp 
1038ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1039ba6e06d0SAmlan Barua 
1040ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1041ba6e06d0SAmlan Barua 
1042ba6e06d0SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1043ba6e06d0SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1044ba6e06d0SAmlan Barua 
1045db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
1046db4deed7SKarl Rupp       else                  NM = 1;
1047ba6e06d0SAmlan Barua 
1048ba6e06d0SAmlan Barua       j = low;
10493564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1050ba6e06d0SAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
1051ba6e06d0SAmlan Barua         indx2[i] = j;
10523564f093SHong Zhang         if (k%dim[ndim-1]==0) j+=NM;
1053ba6e06d0SAmlan Barua         j++;
1054ba6e06d0SAmlan Barua       }
1055ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1056ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1057ba6e06d0SAmlan Barua 
1058ba6e06d0SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1059ba6e06d0SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1060ba6e06d0SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1061ba6e06d0SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1062b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1063b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1064b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1065b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1066bd59e6c6SAmlan Barua #endif
10679496c9aeSAmlan Barua       break;
10685b3e41ffSAmlan Barua     }
1069e81bb599SAmlan Barua   }
10703564f093SHong Zhang   PetscFunctionReturn(0);
10715b3e41ffSAmlan Barua }
10725b3e41ffSAmlan Barua EXTERN_C_END
10735b3e41ffSAmlan Barua 
10745b3e41ffSAmlan Barua EXTERN_C_BEGIN
10755b3e41ffSAmlan Barua #undef __FUNCT__
10763c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
10773c3a9c75SAmlan Barua /*
10783564f093SHong Zhang     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
10793564f093SHong Zhang 
10803c3a9c75SAmlan Barua   Options Database Keys:
10813c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
10823c3a9c75SAmlan Barua 
10833c3a9c75SAmlan Barua    Level: intermediate
10843c3a9c75SAmlan Barua 
10853c3a9c75SAmlan Barua */
1086b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A)
1087b2b77a04SHong Zhang {
1088b2b77a04SHong Zhang   PetscErrorCode ierr;
1089b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
1090b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1091b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1092b2b77a04SHong Zhang   PetscInt       n         =fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1093b2b77a04SHong Zhang   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1094b2b77a04SHong Zhang   PetscBool      flg;
10954f48bc67SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr;
109611902ff2SHong Zhang   PetscMPIInt    size,rank;
10979496c9aeSAmlan Barua   ptrdiff_t      *pdim;
10989496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
10999496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11009496c9aeSAmlan Barua   ptrdiff_t temp;
11018ca4c034SAmlan Barua   PetscInt  N1,tot_dim;
11024f48bc67SAmlan Barua #else
11034f48bc67SAmlan Barua   PetscInt n1;
11049496c9aeSAmlan Barua #endif
11059496c9aeSAmlan Barua 
1106b2b77a04SHong Zhang   PetscFunctionBegin;
1107b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
110811902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1109c92418ccSAmlan Barua 
11101878d478SAmlan Barua   fftw_mpi_init();
111111902ff2SHong Zhang   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
111211902ff2SHong Zhang   pdim[0] = dim[0];
11138ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11148ca4c034SAmlan Barua   tot_dim = 2*dim[0];
11158ca4c034SAmlan Barua #endif
11163564f093SHong Zhang   for (ctr=1; ctr<ndim; ctr++) {
11176882372aSHong Zhang     partial_dim *= dim[ctr];
111811902ff2SHong Zhang     pdim[ctr]    = dim[ctr];
11198ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1120db4deed7SKarl Rupp     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1121db4deed7SKarl Rupp     else             tot_dim *= dim[ctr];
11228ca4c034SAmlan Barua #endif
11236882372aSHong Zhang   }
11243c3a9c75SAmlan Barua 
1125b2b77a04SHong Zhang   if (size == 1) {
1126e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1127b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1128b2b77a04SHong Zhang     n    = N;
1129e81bb599SAmlan Barua #else
1130e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1131e81bb599SAmlan Barua     n    = tot_dim;
1132e81bb599SAmlan Barua #endif
1133e81bb599SAmlan Barua 
1134b2b77a04SHong Zhang   } else {
11351abc6020SAmlan Barua     ptrdiff_t alloc_local,local_n0,local_0_start;
1136b2b77a04SHong Zhang     switch (ndim) {
1137b2b77a04SHong Zhang     case 1:
11383c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11393c3a9c75SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1140e5d7f247SAmlan Barua #else
11419496c9aeSAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1142*26fbe8dcSKarl Rupp 
11436882372aSHong Zhang       n    = (PetscInt)local_n0;
11449496c9aeSAmlan Barua       n1   = (PetscInt)local_n1;
11459496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1146e5d7f247SAmlan Barua #endif
1147b2b77a04SHong Zhang       break;
1148b2b77a04SHong Zhang     case 2:
11495b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
1150b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1151b2b77a04SHong Zhang       /*
1152b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1153b2b77a04SHong Zhang        PetscSynchronizedFlush(comm);
1154b2b77a04SHong Zhang        */
1155b2b77a04SHong Zhang       n    = (PetscInt)local_n0*dim[1];
1156b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
11575b3e41ffSAmlan Barua #else
11584f8276c3SHong Zhang       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1159*26fbe8dcSKarl Rupp 
11605b3e41ffSAmlan Barua       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
1161c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
11625b3e41ffSAmlan Barua #endif
1163b2b77a04SHong Zhang       break;
1164b2b77a04SHong Zhang     case 3:
116551d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
116651d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1167*26fbe8dcSKarl Rupp 
11686882372aSHong Zhang       n    = (PetscInt)local_n0*dim[1]*dim[2];
11696882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
117051d1eed7SAmlan Barua #else
11714f8276c3SHong Zhang       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1172*26fbe8dcSKarl Rupp 
117351d1eed7SAmlan Barua       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1174c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
117551d1eed7SAmlan Barua #endif
1176b2b77a04SHong Zhang       break;
1177b2b77a04SHong Zhang     default:
1178b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
117911902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1180*26fbe8dcSKarl Rupp 
11816882372aSHong Zhang       n    = (PetscInt)local_n0*partial_dim;
11826882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1183b3a17365SAmlan Barua #else
1184b3a17365SAmlan Barua       temp = pdim[ndim-1];
1185*26fbe8dcSKarl Rupp 
1186b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
1187*26fbe8dcSKarl Rupp 
11884f8276c3SHong Zhang       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1189*26fbe8dcSKarl Rupp 
1190b3a17365SAmlan Barua       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1191b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1192*26fbe8dcSKarl Rupp 
1193b3a17365SAmlan Barua       pdim[ndim-1] = temp;
1194*26fbe8dcSKarl Rupp 
1195c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1196b3a17365SAmlan Barua #endif
1197b2b77a04SHong Zhang       break;
1198b2b77a04SHong Zhang     }
1199b2b77a04SHong Zhang   }
1200b2b77a04SHong Zhang   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1201b2b77a04SHong Zhang   ierr      = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1202b2b77a04SHong Zhang   fft->data = (void*)fftw;
1203b2b77a04SHong Zhang 
1204b2b77a04SHong Zhang   fft->n            = n;
12050c74a584SJed Brown   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1206e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
1207*26fbe8dcSKarl Rupp 
1208c92418ccSAmlan Barua   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t*)&(fftw->dim_fftw));CHKERRQ(ierr);
1209*26fbe8dcSKarl Rupp 
1210b1301b2eSHong Zhang   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1211c92418ccSAmlan Barua 
1212b2b77a04SHong Zhang   fftw->p_forward  = 0;
1213b2b77a04SHong Zhang   fftw->p_backward = 0;
1214b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1215b2b77a04SHong Zhang 
1216b2b77a04SHong Zhang   if (size == 1) {
1217b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1218b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1219b2b77a04SHong Zhang   } else {
1220b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1221b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1222b2b77a04SHong Zhang   }
1223b2b77a04SHong Zhang   fft->matdestroy = MatDestroy_FFTW;
1224b2b77a04SHong Zhang   A->assembled    = PETSC_TRUE;
12254a52bad8SHong Zhang   A->preallocated = PETSC_TRUE;
1226*26fbe8dcSKarl Rupp 
12274a52bad8SHong Zhang   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);CHKERRQ(ierr);
12284a52bad8SHong Zhang   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
12294a52bad8SHong Zhang   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1230b2b77a04SHong Zhang 
1231b2b77a04SHong Zhang   /* get runtime options */
1232b2b77a04SHong Zhang   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1233b2b77a04SHong Zhang   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1234*26fbe8dcSKarl Rupp   if (flg) fftw->p_flag = (unsigned)p_flag;
12354a52bad8SHong Zhang   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1236b2b77a04SHong Zhang   PetscFunctionReturn(0);
1237b2b77a04SHong Zhang }
1238b2b77a04SHong Zhang EXTERN_C_END
12393c3a9c75SAmlan Barua 
12403c3a9c75SAmlan Barua 
12413c3a9c75SAmlan Barua 
12423c3a9c75SAmlan Barua 
1243