xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 5b113e21679fe899992bf760b81c2c5ec9e4d212)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t    ndim_fftw,*dim_fftw;
14a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
15a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
16a1b6d50cSHong Zhang #else
178c1d5ab3SHong Zhang   fftw_iodim   *iodims;
18a1b6d50cSHong Zhang #endif
19e5d7f247SAmlan Barua   PetscInt     partial_dim;
20b2b77a04SHong Zhang   fftw_plan    p_forward,p_backward;
21b2b77a04SHong Zhang   unsigned     p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
22b2b77a04SHong Zhang   PetscScalar  *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
23b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
24b2b77a04SHong Zhang } Mat_FFTW;
25b2b77a04SHong Zhang 
26b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
27b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
28b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
29b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
30b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
31b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
32*5b113e21Ss-sajid-ali extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat,Vec*,Vec*,Vec*);
33b2b77a04SHong Zhang 
3497504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel
3597504071SAmlan Barua    Input parameter:
363564f093SHong Zhang      A - the matrix
3797504071SAmlan Barua      x - the vector on which FDFT will be performed
3897504071SAmlan Barua 
3997504071SAmlan Barua    Output parameter:
4097504071SAmlan Barua      y - vector that stores result of FDFT
4197504071SAmlan Barua */
42b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
43b2b77a04SHong Zhang {
44b2b77a04SHong Zhang   PetscErrorCode ierr;
45b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
46b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
47f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
48f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
491acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
50a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
51a1b6d50cSHong Zhang   fftw_iodim64   *iodims;
52a1b6d50cSHong Zhang #else
538c1d5ab3SHong Zhang   fftw_iodim     *iodims;
54a1b6d50cSHong Zhang #endif
551acd80e4SHong Zhang   PetscInt       i;
561acd80e4SHong Zhang #endif
571acd80e4SHong Zhang   PetscInt       ndim = fft->ndim,*dim = fft->dim;
58b2b77a04SHong Zhang 
59b2b77a04SHong Zhang   PetscFunctionBegin;
60f4fca6d4SMatthew G. Knepley   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
61b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
62b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
63b2b77a04SHong Zhang     switch (ndim) {
64b2b77a04SHong Zhang     case 1:
6558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
66b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6758a912c5SAmlan Barua #else
6858a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
6958a912c5SAmlan Barua #endif
70b2b77a04SHong Zhang       break;
71b2b77a04SHong Zhang     case 2:
7258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
73b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
7458a912c5SAmlan Barua #else
7558a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
7658a912c5SAmlan Barua #endif
77b2b77a04SHong Zhang       break;
78b2b77a04SHong Zhang     case 3:
7958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
80b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
8158a912c5SAmlan Barua #else
8258a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
8358a912c5SAmlan Barua #endif
84b2b77a04SHong Zhang       break;
85b2b77a04SHong Zhang     default:
8658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
87a1b6d50cSHong Zhang       iodims = fftw->iodims;
88a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
898c1d5ab3SHong Zhang       if (ndim) {
90a1b6d50cSHong Zhang         iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
918c1d5ab3SHong Zhang         iodims[ndim-1].is = iodims[ndim-1].os = 1;
928c1d5ab3SHong Zhang         for (i=ndim-2; i>=0; --i) {
93a1b6d50cSHong Zhang           iodims[i].n = (ptrdiff_t)dim[i];
948c1d5ab3SHong Zhang           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
958c1d5ab3SHong Zhang         }
968c1d5ab3SHong Zhang       }
97a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
98a1b6d50cSHong Zhang #else
99a1b6d50cSHong Zhang       if (ndim) {
100a1b6d50cSHong Zhang         iodims[ndim-1].n = (int)dim[ndim-1];
101a1b6d50cSHong Zhang         iodims[ndim-1].is = iodims[ndim-1].os = 1;
102a1b6d50cSHong Zhang         for (i=ndim-2; i>=0; --i) {
103a1b6d50cSHong Zhang           iodims[i].n = (int)dim[i];
104a1b6d50cSHong Zhang           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
105a1b6d50cSHong Zhang         }
106a1b6d50cSHong Zhang       }
107a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
108a1b6d50cSHong Zhang #endif
109a1b6d50cSHong Zhang 
11058a912c5SAmlan Barua #else
111a31b9140SHong Zhang       fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
11258a912c5SAmlan Barua #endif
113b2b77a04SHong Zhang       break;
114b2b77a04SHong Zhang     }
115f4fca6d4SMatthew G. Knepley     fftw->finarray  = (PetscScalar *) x_array;
116b2b77a04SHong Zhang     fftw->foutarray = y_array;
117b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
118b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
119b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
120b2b77a04SHong Zhang   } else { /* use existing plan */
121b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
1227e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
123b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
1247e4bc134SDominic Meiser #else
1257e4bc134SDominic Meiser       fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array);
1267e4bc134SDominic Meiser #endif
127b2b77a04SHong Zhang     } else {
128b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
129b2b77a04SHong Zhang     }
130b2b77a04SHong Zhang   }
131b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
132f4fca6d4SMatthew G. Knepley   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
133b2b77a04SHong Zhang   PetscFunctionReturn(0);
134b2b77a04SHong Zhang }
135b2b77a04SHong Zhang 
13697504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT
13797504071SAmlan Barua    Input parameter:
1383564f093SHong Zhang      A - the matrix
13997504071SAmlan Barua      x - the vector on which BDFT will be performed
14097504071SAmlan Barua 
14197504071SAmlan Barua    Output parameter:
14297504071SAmlan Barua      y - vector that stores result of BDFT
14397504071SAmlan Barua */
14497504071SAmlan Barua 
145b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
146b2b77a04SHong Zhang {
147b2b77a04SHong Zhang   PetscErrorCode ierr;
148b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
149b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
150f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
151f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
152b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
1531acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
154a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
155a1b6d50cSHong Zhang   fftw_iodim64   *iodims=fftw->iodims;
156a1b6d50cSHong Zhang #else
157a1b6d50cSHong Zhang   fftw_iodim     *iodims=fftw->iodims;
158a1b6d50cSHong Zhang #endif
1591acd80e4SHong Zhang #endif
160b2b77a04SHong Zhang 
161b2b77a04SHong Zhang   PetscFunctionBegin;
162f4fca6d4SMatthew G. Knepley   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
163b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
164b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
165b2b77a04SHong Zhang     switch (ndim) {
166b2b77a04SHong Zhang     case 1:
16758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
168b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
16958a912c5SAmlan Barua #else
170e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
17158a912c5SAmlan Barua #endif
172b2b77a04SHong Zhang       break;
173b2b77a04SHong Zhang     case 2:
17458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
175b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
17658a912c5SAmlan Barua #else
177e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
17858a912c5SAmlan Barua #endif
179b2b77a04SHong Zhang       break;
180b2b77a04SHong Zhang     case 3:
18158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
182b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
18358a912c5SAmlan Barua #else
184e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
18558a912c5SAmlan Barua #endif
186b2b77a04SHong Zhang       break;
187b2b77a04SHong Zhang     default:
18858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
189a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
190a1b6d50cSHong Zhang       fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
191a1b6d50cSHong Zhang #else
1928c1d5ab3SHong Zhang       fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
193a1b6d50cSHong Zhang #endif
19458a912c5SAmlan Barua #else
195a31b9140SHong Zhang       fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
19658a912c5SAmlan Barua #endif
197b2b77a04SHong Zhang       break;
198b2b77a04SHong Zhang     }
199f4fca6d4SMatthew G. Knepley     fftw->binarray  = (PetscScalar *) x_array;
200b2b77a04SHong Zhang     fftw->boutarray = y_array;
201b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
202b2b77a04SHong Zhang   } else { /* use existing plan */
203b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
2047e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
205b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
2067e4bc134SDominic Meiser #else
2077e4bc134SDominic Meiser       fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array);
2087e4bc134SDominic Meiser #endif
209b2b77a04SHong Zhang     } else {
210b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
211b2b77a04SHong Zhang     }
212b2b77a04SHong Zhang   }
213b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
214f4fca6d4SMatthew G. Knepley   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
215b2b77a04SHong Zhang   PetscFunctionReturn(0);
216b2b77a04SHong Zhang }
217b2b77a04SHong Zhang 
21897504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel
21997504071SAmlan Barua    Input parameter:
2203564f093SHong Zhang      A - the matrix
22197504071SAmlan Barua      x - the vector on which FDFT will be performed
22297504071SAmlan Barua 
22397504071SAmlan Barua    Output parameter:
22497504071SAmlan Barua    y   - vector that stores result of FDFT
22597504071SAmlan Barua */
226b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
227b2b77a04SHong Zhang {
228b2b77a04SHong Zhang   PetscErrorCode ierr;
229b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
230b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
231f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
232f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
233c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
234ce94432eSBarry Smith   MPI_Comm       comm;
235b2b77a04SHong Zhang 
236b2b77a04SHong Zhang   PetscFunctionBegin;
237ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
238f4fca6d4SMatthew G. Knepley   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
239b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
240b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
241b2b77a04SHong Zhang     switch (ndim) {
242b2b77a04SHong Zhang     case 1:
2433c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
244b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
245ae0a50aaSHong Zhang #else
2464f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
2473c3a9c75SAmlan Barua #endif
248b2b77a04SHong Zhang       break;
249b2b77a04SHong Zhang     case 2:
25097504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
251b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2523c3a9c75SAmlan Barua #else
2533c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2543c3a9c75SAmlan Barua #endif
255b2b77a04SHong Zhang       break;
256b2b77a04SHong Zhang     case 3:
2573c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
258b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2593c3a9c75SAmlan Barua #else
26051d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2613c3a9c75SAmlan Barua #endif
262b2b77a04SHong Zhang       break;
263b2b77a04SHong Zhang     default:
2643c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
265c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2663c3a9c75SAmlan Barua #else
2673c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2683c3a9c75SAmlan Barua #endif
269b2b77a04SHong Zhang       break;
270b2b77a04SHong Zhang     }
271f4fca6d4SMatthew G. Knepley     fftw->finarray  = (PetscScalar *) x_array;
272b2b77a04SHong Zhang     fftw->foutarray = y_array;
273b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
274b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
275b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
276b2b77a04SHong Zhang   } else { /* use existing plan */
277b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
278b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
279b2b77a04SHong Zhang     } else {
280b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
281b2b77a04SHong Zhang     }
282b2b77a04SHong Zhang   }
283b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
284f4fca6d4SMatthew G. Knepley   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
285b2b77a04SHong Zhang   PetscFunctionReturn(0);
286b2b77a04SHong Zhang }
287b2b77a04SHong Zhang 
28897504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT
28997504071SAmlan Barua    Input parameter:
2903564f093SHong Zhang      A - the matrix
29197504071SAmlan Barua      x - the vector on which BDFT will be performed
29297504071SAmlan Barua 
29397504071SAmlan Barua    Output parameter:
29497504071SAmlan Barua      y - vector that stores result of BDFT
29597504071SAmlan Barua */
296b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
297b2b77a04SHong Zhang {
298b2b77a04SHong Zhang   PetscErrorCode ierr;
299b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
300b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
301f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
302f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
303c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
304ce94432eSBarry Smith   MPI_Comm       comm;
305b2b77a04SHong Zhang 
306b2b77a04SHong Zhang   PetscFunctionBegin;
307ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
308f4fca6d4SMatthew G. Knepley   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
309b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
310b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
311b2b77a04SHong Zhang     switch (ndim) {
312b2b77a04SHong Zhang     case 1:
3133c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
314b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
315ae0a50aaSHong Zhang #else
3164f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
3173c3a9c75SAmlan Barua #endif
318b2b77a04SHong Zhang       break;
319b2b77a04SHong Zhang     case 2:
32097504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
321b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3223c3a9c75SAmlan Barua #else
3233c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3243c3a9c75SAmlan Barua #endif
325b2b77a04SHong Zhang       break;
326b2b77a04SHong Zhang     case 3:
3273c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
328b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3293c3a9c75SAmlan Barua #else
3303c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3313c3a9c75SAmlan Barua #endif
332b2b77a04SHong Zhang       break;
333b2b77a04SHong Zhang     default:
3343c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
335c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3363c3a9c75SAmlan Barua #else
3373c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3383c3a9c75SAmlan Barua #endif
339b2b77a04SHong Zhang       break;
340b2b77a04SHong Zhang     }
341f4fca6d4SMatthew G. Knepley     fftw->binarray  = (PetscScalar *) x_array;
342b2b77a04SHong Zhang     fftw->boutarray = y_array;
343b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
344b2b77a04SHong Zhang   } else { /* use existing plan */
345b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
346b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
347b2b77a04SHong Zhang     } else {
348b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
349b2b77a04SHong Zhang     }
350b2b77a04SHong Zhang   }
351b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
352f4fca6d4SMatthew G. Knepley   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
353b2b77a04SHong Zhang   PetscFunctionReturn(0);
354b2b77a04SHong Zhang }
355b2b77a04SHong Zhang 
356b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
357b2b77a04SHong Zhang {
358b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
359b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
360b2b77a04SHong Zhang   PetscErrorCode ierr;
361b2b77a04SHong Zhang 
362b2b77a04SHong Zhang   PetscFunctionBegin;
363b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
364b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
3658c1d5ab3SHong Zhang   if (fftw->iodims) {
3668c1d5ab3SHong Zhang     free(fftw->iodims);
3678c1d5ab3SHong Zhang   }
368bb5bf6f6SHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
369bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
3706ccf0b3eSHong Zhang   fftw_mpi_cleanup();
371b2b77a04SHong Zhang   PetscFunctionReturn(0);
372b2b77a04SHong Zhang }
373b2b77a04SHong Zhang 
374c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
375b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
376b2b77a04SHong Zhang {
377b2b77a04SHong Zhang   PetscErrorCode ierr;
378b2b77a04SHong Zhang   PetscScalar    *array;
379b2b77a04SHong Zhang 
380b2b77a04SHong Zhang   PetscFunctionBegin;
381b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
382b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
383b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
384b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
385b2b77a04SHong Zhang   PetscFunctionReturn(0);
386b2b77a04SHong Zhang }
387b2b77a04SHong Zhang 
388*5b113e21Ss-sajid-ali 
389*5b113e21Ss-sajid-ali static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin,Vec *fin_new)
390*5b113e21Ss-sajid-ali {
391*5b113e21Ss-sajid-ali   PetscErrorCode ierr;
392*5b113e21Ss-sajid-ali   Mat            A;
393*5b113e21Ss-sajid-ali 
394*5b113e21Ss-sajid-ali   PetscFunctionBegin;
395*5b113e21Ss-sajid-ali   ierr = PetscObjectQuery((PetscObject)fin,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr);
396*5b113e21Ss-sajid-ali   ierr = MatCreateVecsFFTW_FFTW(A,fin_new,NULL,NULL);CHKERRQ(ierr);
397*5b113e21Ss-sajid-ali   PetscFunctionReturn(0);
398*5b113e21Ss-sajid-ali }
399*5b113e21Ss-sajid-ali 
400*5b113e21Ss-sajid-ali static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout,Vec *fout_new)
401*5b113e21Ss-sajid-ali {
402*5b113e21Ss-sajid-ali   PetscErrorCode ierr;
403*5b113e21Ss-sajid-ali   Mat            A;
404*5b113e21Ss-sajid-ali 
405*5b113e21Ss-sajid-ali   PetscFunctionBegin;
406*5b113e21Ss-sajid-ali   ierr = PetscObjectQuery((PetscObject)fout,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr);
407*5b113e21Ss-sajid-ali   ierr = MatCreateVecsFFTW_FFTW(A,NULL,fout_new,NULL);CHKERRQ(ierr);
408*5b113e21Ss-sajid-ali   PetscFunctionReturn(0);
409*5b113e21Ss-sajid-ali }
410*5b113e21Ss-sajid-ali 
411*5b113e21Ss-sajid-ali static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
412*5b113e21Ss-sajid-ali {
413*5b113e21Ss-sajid-ali   PetscErrorCode ierr;
414*5b113e21Ss-sajid-ali   Mat            A;
415*5b113e21Ss-sajid-ali 
416*5b113e21Ss-sajid-ali   PetscFunctionBegin;
417*5b113e21Ss-sajid-ali   ierr = PetscObjectQuery((PetscObject)bout,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr);
418*5b113e21Ss-sajid-ali   ierr = MatCreateVecsFFTW_FFTW(A,NULL,NULL,bout_new);CHKERRQ(ierr);
419*5b113e21Ss-sajid-ali   PetscFunctionReturn(0);
420*5b113e21Ss-sajid-ali }
421*5b113e21Ss-sajid-ali 
422*5b113e21Ss-sajid-ali 
4234be45526SHong Zhang /*@
4242a7a6963SBarry Smith    MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
4254f7415efSAmlan Barua      parallel layout determined by FFTW
4264f7415efSAmlan Barua 
4274f7415efSAmlan Barua    Collective on Mat
4284f7415efSAmlan Barua 
4294f7415efSAmlan Barua    Input Parameter:
4303564f093SHong Zhang .   A - the matrix
4314f7415efSAmlan Barua 
4324f7415efSAmlan Barua    Output Parameter:
433cc55f3b1SHong Zhang +   x - (optional) input vector of forward FFTW
434cc55f3b1SHong Zhang -   y - (optional) output vector of forward FFTW
435cc55f3b1SHong Zhang -   z - (optional) output vector of backward FFTW
4364f7415efSAmlan Barua 
4374f7415efSAmlan Barua   Level: advanced
4383564f093SHong Zhang 
43997504071SAmlan Barua   Note: The parallel layout of output of forward FFTW is always same as the input
44097504071SAmlan Barua         of the backward FFTW. But parallel layout of the input vector of forward
44197504071SAmlan Barua         FFTW might not be same as the output of backward FFTW.
44297504071SAmlan Barua         Also note that we need to provide enough space while doing parallel real transform.
44397504071SAmlan Barua         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
44497504071SAmlan Barua         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
445e0ec536eSAmlan Barua         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
446e0ec536eSAmlan Barua         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
447e0ec536eSAmlan Barua         zeros if it is odd only one zero is needed.
448e0ec536eSAmlan Barua         Lastly one needs some scratch space at the end of data set in each process. alloc_local
449e0ec536eSAmlan Barua         figures out how much space is needed, i.e. it figures out the data+scratch space for
450e0ec536eSAmlan Barua         each processor and returns that.
4514f7415efSAmlan Barua 
4524f7415efSAmlan Barua .seealso: MatCreateFFTW()
4534be45526SHong Zhang @*/
4542a7a6963SBarry Smith PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
4554be45526SHong Zhang {
4564be45526SHong Zhang   PetscErrorCode ierr;
457b4c3921fSHong Zhang 
4584be45526SHong Zhang   PetscFunctionBegin;
459163d334eSBarry Smith   ierr = PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
4604be45526SHong Zhang   PetscFunctionReturn(0);
4614be45526SHong Zhang }
4624be45526SHong Zhang 
4632a7a6963SBarry Smith PetscErrorCode  MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
4644f7415efSAmlan Barua {
4654f7415efSAmlan Barua   PetscErrorCode ierr;
4664f7415efSAmlan Barua   PetscMPIInt    size,rank;
467ce94432eSBarry Smith   MPI_Comm       comm;
4684f7415efSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
4694f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
4709496c9aeSAmlan Barua   PetscInt       N     = fft->N;
4714f7415efSAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
4724f7415efSAmlan Barua 
4734f7415efSAmlan Barua   PetscFunctionBegin;
4744f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
4754f7415efSAmlan Barua   PetscValidType(A,1);
476ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4774f7415efSAmlan Barua 
4784f8276c3SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
4794f8276c3SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
4804f7415efSAmlan Barua   if (size == 1) { /* sequential case */
4814f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4824f7415efSAmlan Barua     if (fin)  {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
4834f7415efSAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
4844f7415efSAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
4854f7415efSAmlan Barua #else
4868ca4c034SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
4878ca4c034SAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
4888ca4c034SAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
4894f7415efSAmlan Barua #endif
49097504071SAmlan Barua   } else { /* parallel cases */
4914f7415efSAmlan Barua     ptrdiff_t    alloc_local,local_n0,local_0_start;
4929496c9aeSAmlan Barua     ptrdiff_t    local_n1;
4939496c9aeSAmlan Barua     fftw_complex *data_fout;
4949496c9aeSAmlan Barua     ptrdiff_t    local_1_start;
4959496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4969496c9aeSAmlan Barua     fftw_complex *data_fin,*data_bout;
4979496c9aeSAmlan Barua #else
4984f7415efSAmlan Barua     double    *data_finr,*data_boutr;
4996ccf0b3eSHong Zhang     PetscInt  n1,N1;
5009496c9aeSAmlan Barua     ptrdiff_t temp;
5019496c9aeSAmlan Barua #endif
5029496c9aeSAmlan Barua 
5034f7415efSAmlan Barua     switch (ndim) {
5044f7415efSAmlan Barua     case 1:
50557625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5064f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
50757625b84SAmlan Barua #else
50857625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
50957625b84SAmlan Barua       if (fin) {
51057625b84SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
511778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
512*5b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
513*5b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
51457625b84SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
51557625b84SAmlan Barua       }
51657625b84SAmlan Barua       if (fout) {
51757625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
518778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
519*5b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
520*5b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
52157625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
52257625b84SAmlan Barua       }
52357625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
52457625b84SAmlan Barua       if (bout) {
52557625b84SAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
526778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
527*5b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
528*5b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
52957625b84SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
53057625b84SAmlan Barua       }
53157625b84SAmlan Barua       break;
53257625b84SAmlan Barua #endif
5334f7415efSAmlan Barua     case 2:
53497504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
5354f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5364f7415efSAmlan Barua       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
5374f7415efSAmlan Barua       if (fin) {
5384f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
539778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
540*5b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
541*5b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5424f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5434f7415efSAmlan Barua       }
54457625b84SAmlan Barua       if (fout) {
54557625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
546778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
547*5b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
548*5b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
54957625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
55057625b84SAmlan Barua       }
551*5b113e21Ss-sajid-ali       if (bout) {
552*5b113e21Ss-sajid-ali         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
553*5b113e21Ss-sajid-ali         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
554*5b113e21Ss-sajid-ali         ierr       = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
555*5b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
556*5b113e21Ss-sajid-ali         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
557*5b113e21Ss-sajid-ali       }
5584f7415efSAmlan Barua #else
5594f7415efSAmlan Barua       /* Get local size */
5604f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
5614f7415efSAmlan Barua       if (fin) {
5624f7415efSAmlan Barua         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
563778a2246SBarry Smith         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
564*5b113e21Ss-sajid-ali         ierr     = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
565*5b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5664f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5674f7415efSAmlan Barua       }
5684f7415efSAmlan Barua       if (fout) {
5694f7415efSAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
570778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
571*5b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
572*5b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
5734f7415efSAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5744f7415efSAmlan Barua       }
5754f7415efSAmlan Barua       if (bout) {
5764f7415efSAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
577778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
578*5b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
579*5b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5804f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5814f7415efSAmlan Barua       }
5824f7415efSAmlan Barua #endif
5834f7415efSAmlan Barua       break;
5844f7415efSAmlan Barua     case 3:
5854f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5864f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5874f7415efSAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
5884f7415efSAmlan Barua       if (fin) {
5894f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
590778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
591*5b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
592*5b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5934f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5944f7415efSAmlan Barua       }
595*5b113e21Ss-sajid-ali       if (fout) {
596*5b113e21Ss-sajid-ali         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
597*5b113e21Ss-sajid-ali         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
598*5b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
599*5b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
600*5b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
601*5b113e21Ss-sajid-ali       }
6024f7415efSAmlan Barua       if (bout) {
6034f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
604778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
605*5b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
606*5b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6074f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6084f7415efSAmlan Barua       }
6094f7415efSAmlan Barua #else
6100c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
6110c9b18e4SAmlan Barua       if (fin) {
6120c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
613778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
614*5b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
615*5b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6160c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6170c9b18e4SAmlan Barua       }
6180c9b18e4SAmlan Barua       if (fout) {
6190c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
620778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
621*5b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
622*5b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6230c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6240c9b18e4SAmlan Barua       }
6250c9b18e4SAmlan Barua       if (bout) {
6260c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
627778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
628*5b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
629*5b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6300c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6310c9b18e4SAmlan Barua       }
6324f7415efSAmlan Barua #endif
6334f7415efSAmlan Barua       break;
6344f7415efSAmlan Barua     default:
6354f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6364f7415efSAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
63726fbe8dcSKarl Rupp 
6384f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
63926fbe8dcSKarl Rupp 
6404f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
6414f7415efSAmlan Barua       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
64226fbe8dcSKarl Rupp 
6434f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
6444f7415efSAmlan Barua 
6454f7415efSAmlan Barua       if (fin) {
6464f7415efSAmlan Barua         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
647778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
648*5b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
649*5b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6504f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6514f7415efSAmlan Barua       }
652*5b113e21Ss-sajid-ali       if (fout) {
653*5b113e21Ss-sajid-ali         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
654*5b113e21Ss-sajid-ali         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
655*5b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
656*5b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
657*5b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
658*5b113e21Ss-sajid-ali       }
6594f7415efSAmlan Barua       if (bout) {
6604f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
661778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
662*5b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
663*5b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6649496c9aeSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6654f7415efSAmlan Barua       }
6664f7415efSAmlan Barua #else
6670c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
6680c9b18e4SAmlan Barua       if (fin) {
6690c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
670778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
671*5b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
672*5b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6730c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6740c9b18e4SAmlan Barua       }
6750c9b18e4SAmlan Barua       if (fout) {
6760c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
677778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
678*5b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
679*5b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6800c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6810c9b18e4SAmlan Barua       }
6820c9b18e4SAmlan Barua       if (bout) {
6830c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
684778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
685*5b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
686*5b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6870c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6880c9b18e4SAmlan Barua       }
6894f7415efSAmlan Barua #endif
6904f7415efSAmlan Barua       break;
6914f7415efSAmlan Barua     }
6924f7415efSAmlan Barua   }
6934f7415efSAmlan Barua   PetscFunctionReturn(0);
6944f7415efSAmlan Barua }
6954f7415efSAmlan Barua 
6963564f093SHong Zhang /*@
6973564f093SHong Zhang    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
69854efbe56SHong Zhang 
6993564f093SHong Zhang    Collective on Mat
7003564f093SHong Zhang 
7013564f093SHong Zhang    Input Parameters:
7023564f093SHong Zhang +  A - FFTW matrix
7033564f093SHong Zhang -  x - the PETSc vector
7043564f093SHong Zhang 
7053564f093SHong Zhang    Output Parameters:
7063564f093SHong Zhang .  y - the FFTW vector
7073564f093SHong Zhang 
708b2b77a04SHong Zhang   Options Database Keys:
7093564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags
710b2b77a04SHong Zhang 
711b2b77a04SHong Zhang    Level: intermediate
7123564f093SHong Zhang 
71397504071SAmlan Barua    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
71497504071SAmlan Barua          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
71597504071SAmlan Barua          zeros. This routine does that job by scattering operation.
71697504071SAmlan Barua 
7173564f093SHong Zhang .seealso: VecScatterFFTWToPetsc()
7183564f093SHong Zhang @*/
7193564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
7203564f093SHong Zhang {
7213564f093SHong Zhang   PetscErrorCode ierr;
722b2b77a04SHong Zhang 
7233564f093SHong Zhang   PetscFunctionBegin;
724163d334eSBarry Smith   ierr = PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
7253564f093SHong Zhang   PetscFunctionReturn(0);
7263564f093SHong Zhang }
7273c3a9c75SAmlan Barua 
72874a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
7293c3a9c75SAmlan Barua {
7303c3a9c75SAmlan Barua   PetscErrorCode ierr;
731ce94432eSBarry Smith   MPI_Comm       comm;
7323c3a9c75SAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
7333c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
7349496c9aeSAmlan Barua   PetscInt       N     =fft->N;
735b5d11533SAmlan Barua   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
7369496c9aeSAmlan Barua   PetscInt       low;
7377a21806cSHong Zhang   PetscMPIInt    rank,size;
7387a21806cSHong Zhang   PetscInt       vsize,vsize1;
7397a21806cSHong Zhang   ptrdiff_t      local_n0,local_0_start;
7409496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
7413c3a9c75SAmlan Barua   VecScatter     vecscat;
7423c3a9c75SAmlan Barua   IS             list1,list2;
7439496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
7449496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
7459496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
746a31b9140SHong Zhang   PetscInt       NM;
7479496c9aeSAmlan Barua   ptrdiff_t      temp;
7489496c9aeSAmlan Barua #endif
7493c3a9c75SAmlan Barua 
7503564f093SHong Zhang   PetscFunctionBegin;
751ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
752f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
753f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
7540298fd71SBarry Smith   ierr = VecGetOwnershipRange(y,&low,NULL);
7553c3a9c75SAmlan Barua 
7563564f093SHong Zhang   if (size==1) {
7578ca4c034SAmlan Barua     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
7588ca4c034SAmlan Barua     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
7599496c9aeSAmlan Barua     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
7609448b7f1SJunchao Zhang     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
7616971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7626971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7636971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
764b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
7653564f093SHong Zhang   } else {
7663c3a9c75SAmlan Barua     switch (ndim) {
7673c3a9c75SAmlan Barua     case 1:
76864657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7697a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
77026fbe8dcSKarl Rupp 
7714f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);
7724f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,low,1,&list2);
7739448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
77464657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
77564657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
77664657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
77764657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
77864657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
77964657d84SAmlan Barua #else
780e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
78164657d84SAmlan Barua #endif
7823c3a9c75SAmlan Barua       break;
7833c3a9c75SAmlan Barua     case 2:
784bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7857a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
78626fbe8dcSKarl Rupp 
7874f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
7884f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
7899448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
790bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
791bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
792bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
793bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
794bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
795bd59e6c6SAmlan Barua #else
796c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
79726fbe8dcSKarl Rupp 
798854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
799854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
8003c3a9c75SAmlan Barua 
8013564f093SHong Zhang       if (dim[1]%2==0) {
8023c3a9c75SAmlan Barua         NM = dim[1]+2;
8033564f093SHong Zhang       } else {
8043c3a9c75SAmlan Barua         NM = dim[1]+1;
8053564f093SHong Zhang       }
8063c3a9c75SAmlan Barua       for (i=0; i<local_n0; i++) {
8073c3a9c75SAmlan Barua         for (j=0; j<dim[1]; j++) {
8083c3a9c75SAmlan Barua           tempindx  = i*dim[1] + j;
8093c3a9c75SAmlan Barua           tempindx1 = i*NM + j;
81026fbe8dcSKarl Rupp 
8115b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
8123c3a9c75SAmlan Barua           indx2[tempindx]=low+tempindx1;
8133c3a9c75SAmlan Barua         }
8143c3a9c75SAmlan Barua       }
8153c3a9c75SAmlan Barua 
8163c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
8173c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
8183c3a9c75SAmlan Barua 
8199448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
820f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
821f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
822f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
823b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
824b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
825b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
826b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
827bd59e6c6SAmlan Barua #endif
8289496c9aeSAmlan Barua       break;
8293c3a9c75SAmlan Barua 
8303c3a9c75SAmlan Barua     case 3:
831bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
8327a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
83326fbe8dcSKarl Rupp 
8344f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
8354f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
8369448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
837bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
838bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
839bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
840bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
841bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
842bd59e6c6SAmlan Barua #else
843f1ade23cSHong Zhang       /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
844f1ade23cSHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform");
8457a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
84626fbe8dcSKarl Rupp 
847854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
848854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
84951d1eed7SAmlan Barua 
850db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
851db4deed7SKarl Rupp       else             NM = dim[2]+1;
85251d1eed7SAmlan Barua 
85351d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
85451d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
85551d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
85651d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
85751d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
85826fbe8dcSKarl Rupp 
85951d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
86051d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
86151d1eed7SAmlan Barua           }
86251d1eed7SAmlan Barua         }
86351d1eed7SAmlan Barua       }
86451d1eed7SAmlan Barua 
86551d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
86651d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
8679448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
86851d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
86951d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
87051d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
871b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
872b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
873b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
874b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
875bd59e6c6SAmlan Barua #endif
8769496c9aeSAmlan Barua       break;
8773c3a9c75SAmlan Barua 
8783c3a9c75SAmlan Barua     default:
879bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
8807a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
88126fbe8dcSKarl Rupp 
8824f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
8834f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
8849448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
885bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
888bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
889bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
890bd59e6c6SAmlan Barua #else
891f1ade23cSHong Zhang       /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
892f1ade23cSHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform");
893e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
89426fbe8dcSKarl Rupp 
895e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
89626fbe8dcSKarl Rupp 
8977a21806cSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
89826fbe8dcSKarl Rupp 
899e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
900e5d7f247SAmlan Barua 
901e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
902e5d7f247SAmlan Barua 
903854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
904854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
905e5d7f247SAmlan Barua 
906db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
907db4deed7SKarl Rupp       else                  NM = 1;
908e5d7f247SAmlan Barua 
9096971673cSAmlan Barua       j = low;
9103564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
9116971673cSAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
9126971673cSAmlan Barua         indx2[i] = j;
91326fbe8dcSKarl Rupp         if (k%dim[ndim-1]==0) j+=NM;
9146971673cSAmlan Barua         j++;
9156971673cSAmlan Barua       }
9166971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
9176971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
9189448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
9196971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9206971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9216971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
922b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
923b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
924b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
925b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
926bd59e6c6SAmlan Barua #endif
9279496c9aeSAmlan Barua       break;
9283c3a9c75SAmlan Barua     }
929e81bb599SAmlan Barua   }
9303564f093SHong Zhang   PetscFunctionReturn(0);
9313c3a9c75SAmlan Barua }
9323c3a9c75SAmlan Barua 
9333564f093SHong Zhang /*@
9343564f093SHong Zhang    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
9353564f093SHong Zhang 
9363564f093SHong Zhang    Collective on Mat
9373564f093SHong Zhang 
9383564f093SHong Zhang     Input Parameters:
9393564f093SHong Zhang +   A - FFTW matrix
9403564f093SHong Zhang -   x - FFTW vector
9413564f093SHong Zhang 
9423564f093SHong Zhang    Output Parameters:
9433564f093SHong Zhang .  y - PETSc vector
9443564f093SHong Zhang 
9453564f093SHong Zhang    Level: intermediate
9463564f093SHong Zhang 
9473564f093SHong Zhang    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
9483564f093SHong Zhang          VecScatterFFTWToPetsc removes those extra zeros.
9493564f093SHong Zhang 
9503564f093SHong Zhang .seealso: VecScatterPetscToFFTW()
9513564f093SHong Zhang @*/
95274a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
9533c3a9c75SAmlan Barua {
9543c3a9c75SAmlan Barua   PetscErrorCode ierr;
9555fd66863SKarl Rupp 
9563c3a9c75SAmlan Barua   PetscFunctionBegin;
957163d334eSBarry Smith   ierr = PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
9583c3a9c75SAmlan Barua   PetscFunctionReturn(0);
9593c3a9c75SAmlan Barua }
96054efbe56SHong Zhang 
96174a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
9625b3e41ffSAmlan Barua {
9635b3e41ffSAmlan Barua   PetscErrorCode ierr;
964ce94432eSBarry Smith   MPI_Comm       comm;
9655b3e41ffSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
9665b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
9679496c9aeSAmlan Barua   PetscInt       N     = fft->N;
968b3655f67SAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
9699496c9aeSAmlan Barua   PetscInt       low;
9707a21806cSHong Zhang   PetscMPIInt    rank,size;
9717a21806cSHong Zhang   ptrdiff_t      local_n0,local_0_start;
9729496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
9735b3e41ffSAmlan Barua   VecScatter     vecscat;
9745b3e41ffSAmlan Barua   IS             list1,list2;
9759496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
9769496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
9779496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
978a31b9140SHong Zhang   PetscInt       NM;
9799496c9aeSAmlan Barua   ptrdiff_t      temp;
9809496c9aeSAmlan Barua #endif
9819496c9aeSAmlan Barua 
9823564f093SHong Zhang   PetscFunctionBegin;
983ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
9845b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
9855b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
9860298fd71SBarry Smith   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
9875b3e41ffSAmlan Barua 
988e81bb599SAmlan Barua   if (size==1) {
989b3655f67SAmlan Barua     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
9909448b7f1SJunchao Zhang     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
9916971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9926971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9936971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
994b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
995e81bb599SAmlan Barua 
9963564f093SHong Zhang   } else {
997e81bb599SAmlan Barua     switch (ndim) {
998e81bb599SAmlan Barua     case 1:
99964657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10007a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
100126fbe8dcSKarl Rupp 
10024f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);
10034f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,low,1,&list2);
10049448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
100564657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
100664657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
100764657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
100864657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
100964657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
101064657d84SAmlan Barua #else
10116a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
101264657d84SAmlan Barua #endif
10135b3e41ffSAmlan Barua       break;
10145b3e41ffSAmlan Barua     case 2:
1015bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10167a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
101726fbe8dcSKarl Rupp 
10184f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
10194f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
10209448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1021bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1022bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1023bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1024bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1025bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1026bd59e6c6SAmlan Barua #else
10277a21806cSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
102826fbe8dcSKarl Rupp 
1029854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
1030854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
10315b3e41ffSAmlan Barua 
1032db4deed7SKarl Rupp       if (dim[1]%2==0) NM = dim[1]+2;
1033db4deed7SKarl Rupp       else             NM = dim[1]+1;
10345b3e41ffSAmlan Barua 
10355b3e41ffSAmlan Barua       for (i=0; i<local_n0; i++) {
10365b3e41ffSAmlan Barua         for (j=0; j<dim[1]; j++) {
10375b3e41ffSAmlan Barua           tempindx = i*dim[1] + j;
10385b3e41ffSAmlan Barua           tempindx1 = i*NM + j;
103926fbe8dcSKarl Rupp 
10405b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
10415b3e41ffSAmlan Barua           indx2[tempindx]=low+tempindx1;
10425b3e41ffSAmlan Barua         }
10435b3e41ffSAmlan Barua       }
10445b3e41ffSAmlan Barua 
10455b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
10465b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
10475b3e41ffSAmlan Barua 
10489448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
10495b3e41ffSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10505b3e41ffSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10515b3e41ffSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1052b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1053b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1054b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1055b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1056bd59e6c6SAmlan Barua #endif
10579496c9aeSAmlan Barua       break;
10585b3e41ffSAmlan Barua     case 3:
1059bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10607a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
106126fbe8dcSKarl Rupp 
10624f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
10634f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
10649448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1065bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1066bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1067bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1068bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1069bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1070bd59e6c6SAmlan Barua #else
10717a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
107226fbe8dcSKarl Rupp 
1073854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1074854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
107551d1eed7SAmlan Barua 
1076db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
1077db4deed7SKarl Rupp       else             NM = dim[2]+1;
107851d1eed7SAmlan Barua 
107951d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
108051d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
108151d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
108251d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
108351d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
108426fbe8dcSKarl Rupp 
108551d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
108651d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
108751d1eed7SAmlan Barua           }
108851d1eed7SAmlan Barua         }
108951d1eed7SAmlan Barua       }
109051d1eed7SAmlan Barua 
109151d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
109251d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
109351d1eed7SAmlan Barua 
10949448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
109551d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
109651d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
109751d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1098b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1099b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1100b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1101b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1102bd59e6c6SAmlan Barua #endif
11039496c9aeSAmlan Barua       break;
11045b3e41ffSAmlan Barua     default:
1105bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
11067a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
110726fbe8dcSKarl Rupp 
11084f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
11094f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
11109448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1111bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1113bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1114bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1115bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1116bd59e6c6SAmlan Barua #else
1117ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
111826fbe8dcSKarl Rupp 
1119ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
112026fbe8dcSKarl Rupp 
1121c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
112226fbe8dcSKarl Rupp 
1123ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1124ba6e06d0SAmlan Barua 
1125ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1126ba6e06d0SAmlan Barua 
1127854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1128854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1129ba6e06d0SAmlan Barua 
1130db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
1131db4deed7SKarl Rupp       else                  NM = 1;
1132ba6e06d0SAmlan Barua 
1133ba6e06d0SAmlan Barua       j = low;
11343564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1135ba6e06d0SAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
1136ba6e06d0SAmlan Barua         indx2[i] = j;
11373564f093SHong Zhang         if (k%dim[ndim-1]==0) j+=NM;
1138ba6e06d0SAmlan Barua         j++;
1139ba6e06d0SAmlan Barua       }
1140ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1141ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1142ba6e06d0SAmlan Barua 
11439448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1144ba6e06d0SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1145ba6e06d0SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1146ba6e06d0SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1147b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1148b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1149b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1150b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1151bd59e6c6SAmlan Barua #endif
11529496c9aeSAmlan Barua       break;
11535b3e41ffSAmlan Barua     }
1154e81bb599SAmlan Barua   }
11553564f093SHong Zhang   PetscFunctionReturn(0);
11565b3e41ffSAmlan Barua }
11575b3e41ffSAmlan Barua 
11583c3a9c75SAmlan Barua /*
11593564f093SHong Zhang     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
11603564f093SHong Zhang 
11613c3a9c75SAmlan Barua   Options Database Keys:
11623c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
11633c3a9c75SAmlan Barua 
11643c3a9c75SAmlan Barua    Level: intermediate
11653c3a9c75SAmlan Barua 
11663c3a9c75SAmlan Barua */
11678cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1168b2b77a04SHong Zhang {
1169b2b77a04SHong Zhang   PetscErrorCode ierr;
1170ce94432eSBarry Smith   MPI_Comm       comm;
1171b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1172b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1173b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
11745274ed1bSHong Zhang   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
11755274ed1bSHong Zhang   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1176b2b77a04SHong Zhang   PetscBool      flg;
11774f48bc67SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr;
117811902ff2SHong Zhang   PetscMPIInt    size,rank;
11799496c9aeSAmlan Barua   ptrdiff_t      *pdim;
11809496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
11819496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11829496c9aeSAmlan Barua   ptrdiff_t      temp;
11838ca4c034SAmlan Barua   PetscInt       N1,tot_dim;
11844f48bc67SAmlan Barua #else
11854f48bc67SAmlan Barua   PetscInt       n1;
11869496c9aeSAmlan Barua #endif
11879496c9aeSAmlan Barua 
1188b2b77a04SHong Zhang   PetscFunctionBegin;
1189ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1190b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
119111902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1192c92418ccSAmlan Barua 
11931878d478SAmlan Barua   fftw_mpi_init();
119411902ff2SHong Zhang   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
119511902ff2SHong Zhang   pdim[0] = dim[0];
11968ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11978ca4c034SAmlan Barua   tot_dim = 2*dim[0];
11988ca4c034SAmlan Barua #endif
11993564f093SHong Zhang   for (ctr=1; ctr<ndim; ctr++) {
12006882372aSHong Zhang     partial_dim *= dim[ctr];
120111902ff2SHong Zhang     pdim[ctr]    = dim[ctr];
12028ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1203db4deed7SKarl Rupp     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1204db4deed7SKarl Rupp     else             tot_dim *= dim[ctr];
12058ca4c034SAmlan Barua #endif
12066882372aSHong Zhang   }
12073c3a9c75SAmlan Barua 
1208b2b77a04SHong Zhang   if (size == 1) {
1209e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1210b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1211b2b77a04SHong Zhang     n    = N;
1212e81bb599SAmlan Barua #else
1213e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1214e81bb599SAmlan Barua     n    = tot_dim;
1215e81bb599SAmlan Barua #endif
1216e81bb599SAmlan Barua 
1217b2b77a04SHong Zhang   } else {
12187a21806cSHong Zhang     ptrdiff_t local_n0,local_0_start;
1219b2b77a04SHong Zhang     switch (ndim) {
1220b2b77a04SHong Zhang     case 1:
12213c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12223c3a9c75SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1223e5d7f247SAmlan Barua #else
12247a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
122526fbe8dcSKarl Rupp 
12266882372aSHong Zhang       n    = (PetscInt)local_n0;
12279496c9aeSAmlan Barua       n1   = (PetscInt)local_n1;
12289496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1229e5d7f247SAmlan Barua #endif
1230b2b77a04SHong Zhang       break;
1231b2b77a04SHong Zhang     case 2:
12325b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
12337a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1234b2b77a04SHong Zhang       /*
1235b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
12360ec8b6e3SBarry Smith        PetscSynchronizedFlush(comm,PETSC_STDOUT);
1237b2b77a04SHong Zhang        */
1238b2b77a04SHong Zhang       n    = (PetscInt)local_n0*dim[1];
1239b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
12405b3e41ffSAmlan Barua #else
1241c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
124226fbe8dcSKarl Rupp 
12435b3e41ffSAmlan Barua       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
1244c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
12455b3e41ffSAmlan Barua #endif
1246b2b77a04SHong Zhang       break;
1247b2b77a04SHong Zhang     case 3:
124851d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12497a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
125026fbe8dcSKarl Rupp 
12516882372aSHong Zhang       n    = (PetscInt)local_n0*dim[1]*dim[2];
12526882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
125351d1eed7SAmlan Barua #else
1254c3eba89aSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
125526fbe8dcSKarl Rupp 
125651d1eed7SAmlan Barua       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1257c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
125851d1eed7SAmlan Barua #endif
1259b2b77a04SHong Zhang       break;
1260b2b77a04SHong Zhang     default:
1261b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12627a21806cSHong Zhang       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
126326fbe8dcSKarl Rupp 
12646882372aSHong Zhang       n    = (PetscInt)local_n0*partial_dim;
12656882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1266b3a17365SAmlan Barua #else
1267b3a17365SAmlan Barua       temp = pdim[ndim-1];
126826fbe8dcSKarl Rupp 
1269b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
127026fbe8dcSKarl Rupp 
1271c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
127226fbe8dcSKarl Rupp 
1273b3a17365SAmlan Barua       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1274b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
127526fbe8dcSKarl Rupp 
1276b3a17365SAmlan Barua       pdim[ndim-1] = temp;
127726fbe8dcSKarl Rupp 
1278c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1279b3a17365SAmlan Barua #endif
1280b2b77a04SHong Zhang       break;
1281b2b77a04SHong Zhang     }
1282b2b77a04SHong Zhang   }
12832277172eSMark Adams   free(pdim);
1284b2b77a04SHong Zhang   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1285b00a9115SJed Brown   ierr      = PetscNewLog(A,&fftw);CHKERRQ(ierr);
1286b2b77a04SHong Zhang   fft->data = (void*)fftw;
1287b2b77a04SHong Zhang 
1288b2b77a04SHong Zhang   fft->n            = n;
12890c74a584SJed Brown   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1290e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
129126fbe8dcSKarl Rupp 
12925e806cb5SMatthew G. Knepley   ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr);
12938c1d5ab3SHong Zhang   if (size == 1) {
1294a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
1295a1b6d50cSHong Zhang     fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1296a1b6d50cSHong Zhang #else
12978c1d5ab3SHong Zhang     fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1298a1b6d50cSHong Zhang #endif
12998c1d5ab3SHong Zhang   }
130026fbe8dcSKarl Rupp 
1301b1301b2eSHong Zhang   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1302c92418ccSAmlan Barua 
1303b2b77a04SHong Zhang   fftw->p_forward  = 0;
1304b2b77a04SHong Zhang   fftw->p_backward = 0;
1305b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1306b2b77a04SHong Zhang 
1307b2b77a04SHong Zhang   if (size == 1) {
1308b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1309b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1310b2b77a04SHong Zhang   } else {
1311b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1312b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1313b2b77a04SHong Zhang   }
1314b2b77a04SHong Zhang   fft->matdestroy = MatDestroy_FFTW;
1315b2b77a04SHong Zhang   A->assembled    = PETSC_TRUE;
13164a52bad8SHong Zhang   A->preallocated = PETSC_TRUE;
131726fbe8dcSKarl Rupp 
13182a7a6963SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr);
1319bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
1320bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1321b2b77a04SHong Zhang 
1322b2b77a04SHong Zhang   /* get runtime options */
1323ce94432eSBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
13245274ed1bSHong Zhang   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr);
13255274ed1bSHong Zhang   if (flg) {
13265274ed1bSHong Zhang     fftw->p_flag = iplans[p_flag];
13275274ed1bSHong Zhang   }
13284a52bad8SHong Zhang   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1329b2b77a04SHong Zhang   PetscFunctionReturn(0);
1330b2b77a04SHong Zhang }
13313c3a9c75SAmlan Barua 
13323c3a9c75SAmlan Barua 
13333c3a9c75SAmlan Barua 
13343c3a9c75SAmlan Barua 
1335