xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 0cf2e031d06cfcbb918fa74e38968d72ac843b23)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4c4762a1bSJed Brown     Testing examples can be found in ~src/mat/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
10c6db04a5SJed Brown #include <fftw3-mpi.h>
11*0cf2e031SBarry Smith #else
12*0cf2e031SBarry Smith #include <fftw3.h>
13*0cf2e031SBarry Smith #endif
14b2b77a04SHong Zhang EXTERN_C_END
15b2b77a04SHong Zhang 
16b2b77a04SHong Zhang typedef struct {
17b9ae062cSHong Zhang   ptrdiff_t    ndim_fftw,*dim_fftw;
18a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
19a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
20a1b6d50cSHong Zhang #else
218c1d5ab3SHong Zhang   fftw_iodim   *iodims;
22a1b6d50cSHong Zhang #endif
23e5d7f247SAmlan Barua   PetscInt     partial_dim;
24b2b77a04SHong Zhang   fftw_plan    p_forward,p_backward;
25b2b77a04SHong Zhang   unsigned     p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
26b2b77a04SHong Zhang   PetscScalar  *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
27b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
28b2b77a04SHong Zhang } Mat_FFTW;
29b2b77a04SHong Zhang 
30b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
31b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
32*0cf2e031SBarry Smith extern PetscErrorCode MatDestroy_FFTW(Mat);
33*0cf2e031SBarry Smith extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat,Vec*,Vec*,Vec*);
34*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
35b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
36b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
37b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
38*0cf2e031SBarry Smith #endif
39b2b77a04SHong Zhang 
40*0cf2e031SBarry Smith /*
41*0cf2e031SBarry Smith    MatMult_SeqFFTW performs forward DFT
4297504071SAmlan Barua    Input parameter:
433564f093SHong Zhang      A - the matrix
4497504071SAmlan Barua      x - the vector on which FDFT will be performed
4597504071SAmlan Barua 
4697504071SAmlan Barua    Output parameter:
4797504071SAmlan Barua      y - vector that stores result of FDFT
4897504071SAmlan Barua */
49b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
50b2b77a04SHong Zhang {
51b2b77a04SHong Zhang   PetscErrorCode ierr;
52b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
53b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
54f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
55f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
561acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
57a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
58a1b6d50cSHong Zhang   fftw_iodim64   *iodims;
59a1b6d50cSHong Zhang #else
608c1d5ab3SHong Zhang   fftw_iodim     *iodims;
61a1b6d50cSHong Zhang #endif
621acd80e4SHong Zhang   PetscInt       i;
631acd80e4SHong Zhang #endif
641acd80e4SHong Zhang   PetscInt       ndim = fft->ndim,*dim = fft->dim;
65b2b77a04SHong Zhang 
66b2b77a04SHong Zhang   PetscFunctionBegin;
67f4fca6d4SMatthew G. Knepley   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
68b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
69b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
70b2b77a04SHong Zhang     switch (ndim) {
71b2b77a04SHong Zhang     case 1:
7258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
73b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
7458a912c5SAmlan Barua #else
7558a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
7658a912c5SAmlan Barua #endif
77b2b77a04SHong Zhang       break;
78b2b77a04SHong Zhang     case 2:
7958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
80b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
8158a912c5SAmlan Barua #else
8258a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
8358a912c5SAmlan Barua #endif
84b2b77a04SHong Zhang       break;
85b2b77a04SHong Zhang     case 3:
8658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
87b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
8858a912c5SAmlan Barua #else
8958a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
9058a912c5SAmlan Barua #endif
91b2b77a04SHong Zhang       break;
92b2b77a04SHong Zhang     default:
9358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
94a1b6d50cSHong Zhang       iodims = fftw->iodims;
95a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
968c1d5ab3SHong Zhang       if (ndim) {
97a1b6d50cSHong Zhang         iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
988c1d5ab3SHong Zhang         iodims[ndim-1].is = iodims[ndim-1].os = 1;
998c1d5ab3SHong Zhang         for (i=ndim-2; i>=0; --i) {
100a1b6d50cSHong Zhang           iodims[i].n = (ptrdiff_t)dim[i];
1018c1d5ab3SHong Zhang           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
1028c1d5ab3SHong Zhang         }
1038c1d5ab3SHong Zhang       }
104a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
105a1b6d50cSHong Zhang #else
106a1b6d50cSHong Zhang       if (ndim) {
107a1b6d50cSHong Zhang         iodims[ndim-1].n = (int)dim[ndim-1];
108a1b6d50cSHong Zhang         iodims[ndim-1].is = iodims[ndim-1].os = 1;
109a1b6d50cSHong Zhang         for (i=ndim-2; i>=0; --i) {
110a1b6d50cSHong Zhang           iodims[i].n = (int)dim[i];
111a1b6d50cSHong Zhang           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
112a1b6d50cSHong Zhang         }
113a1b6d50cSHong Zhang       }
114a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
115a1b6d50cSHong Zhang #endif
116a1b6d50cSHong Zhang 
11758a912c5SAmlan Barua #else
118a31b9140SHong Zhang       fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
11958a912c5SAmlan Barua #endif
120b2b77a04SHong Zhang       break;
121b2b77a04SHong Zhang     }
122f4fca6d4SMatthew G. Knepley     fftw->finarray  = (PetscScalar *) x_array;
123b2b77a04SHong Zhang     fftw->foutarray = y_array;
124b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
125b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
126b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
127b2b77a04SHong Zhang   } else { /* use existing plan */
128b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
1297e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
130b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
1317e4bc134SDominic Meiser #else
1327e4bc134SDominic Meiser       fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array);
1337e4bc134SDominic Meiser #endif
134b2b77a04SHong Zhang     } else {
135b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
136b2b77a04SHong Zhang     }
137b2b77a04SHong Zhang   }
138b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
139f4fca6d4SMatthew G. Knepley   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
140b2b77a04SHong Zhang   PetscFunctionReturn(0);
141b2b77a04SHong Zhang }
142b2b77a04SHong Zhang 
14397504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT
14497504071SAmlan Barua    Input parameter:
1453564f093SHong Zhang      A - the matrix
14697504071SAmlan Barua      x - the vector on which BDFT will be performed
14797504071SAmlan Barua 
14897504071SAmlan Barua    Output parameter:
14997504071SAmlan Barua      y - vector that stores result of BDFT
15097504071SAmlan Barua */
15197504071SAmlan Barua 
152b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
153b2b77a04SHong Zhang {
154b2b77a04SHong Zhang   PetscErrorCode ierr;
155b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
156b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
157f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
158f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
159b2b77a04SHong Zhang   PetscInt       ndim = fft->ndim,*dim = fft->dim;
1601acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
161a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
162a1b6d50cSHong Zhang   fftw_iodim64   *iodims = fftw->iodims;
163a1b6d50cSHong Zhang #else
164a1b6d50cSHong Zhang   fftw_iodim     *iodims = fftw->iodims;
165a1b6d50cSHong Zhang #endif
1661acd80e4SHong Zhang #endif
167b2b77a04SHong Zhang 
168b2b77a04SHong Zhang   PetscFunctionBegin;
169f4fca6d4SMatthew G. Knepley   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
170b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
171b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
172b2b77a04SHong Zhang     switch (ndim) {
173b2b77a04SHong Zhang     case 1:
17458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
175b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
17658a912c5SAmlan Barua #else
177e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
17858a912c5SAmlan Barua #endif
179b2b77a04SHong Zhang       break;
180b2b77a04SHong Zhang     case 2:
18158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
182b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
18358a912c5SAmlan Barua #else
184e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
18558a912c5SAmlan Barua #endif
186b2b77a04SHong Zhang       break;
187b2b77a04SHong Zhang     case 3:
18858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
189b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
19058a912c5SAmlan Barua #else
191e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
19258a912c5SAmlan Barua #endif
193b2b77a04SHong Zhang       break;
194b2b77a04SHong Zhang     default:
19558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
196a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
197a1b6d50cSHong Zhang       fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
198a1b6d50cSHong Zhang #else
1998c1d5ab3SHong Zhang       fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
200a1b6d50cSHong Zhang #endif
20158a912c5SAmlan Barua #else
202a31b9140SHong Zhang       fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
20358a912c5SAmlan Barua #endif
204b2b77a04SHong Zhang       break;
205b2b77a04SHong Zhang     }
206f4fca6d4SMatthew G. Knepley     fftw->binarray  = (PetscScalar *) x_array;
207b2b77a04SHong Zhang     fftw->boutarray = y_array;
2082f613bf5SBarry Smith     fftw_execute(fftw->p_backward);
209b2b77a04SHong Zhang   } else { /* use existing plan */
210b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
2117e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
212b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
2137e4bc134SDominic Meiser #else
2147e4bc134SDominic Meiser       fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array);
2157e4bc134SDominic Meiser #endif
216b2b77a04SHong Zhang     } else {
2172f613bf5SBarry Smith       fftw_execute(fftw->p_backward);
218b2b77a04SHong Zhang     }
219b2b77a04SHong Zhang   }
220b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
221f4fca6d4SMatthew G. Knepley   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
222b2b77a04SHong Zhang   PetscFunctionReturn(0);
223b2b77a04SHong Zhang }
224b2b77a04SHong Zhang 
225*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
22697504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel
22797504071SAmlan Barua    Input parameter:
2283564f093SHong Zhang      A - the matrix
22997504071SAmlan Barua      x - the vector on which FDFT will be performed
23097504071SAmlan Barua 
23197504071SAmlan Barua    Output parameter:
23297504071SAmlan Barua    y   - vector that stores result of FDFT
23397504071SAmlan Barua */
234b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
235b2b77a04SHong Zhang {
236b2b77a04SHong Zhang   PetscErrorCode ierr;
237b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
238b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
239f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
240f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
241c92418ccSAmlan Barua   PetscInt       ndim = fft->ndim,*dim = fft->dim;
242ce94432eSBarry Smith   MPI_Comm       comm;
243b2b77a04SHong Zhang 
244b2b77a04SHong Zhang   PetscFunctionBegin;
245ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
246f4fca6d4SMatthew G. Knepley   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
247b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
248b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
249b2b77a04SHong Zhang     switch (ndim) {
250b2b77a04SHong Zhang     case 1:
2513c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
252b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
253ae0a50aaSHong Zhang #else
2544f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
2553c3a9c75SAmlan Barua #endif
256b2b77a04SHong Zhang       break;
257b2b77a04SHong Zhang     case 2:
25897504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
259b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2603c3a9c75SAmlan Barua #else
2613c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2623c3a9c75SAmlan Barua #endif
263b2b77a04SHong Zhang       break;
264b2b77a04SHong Zhang     case 3:
2653c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
266b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2673c3a9c75SAmlan Barua #else
26851d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2693c3a9c75SAmlan Barua #endif
270b2b77a04SHong Zhang       break;
271b2b77a04SHong Zhang     default:
2723c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
273c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2743c3a9c75SAmlan Barua #else
2753c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2763c3a9c75SAmlan Barua #endif
277b2b77a04SHong Zhang       break;
278b2b77a04SHong Zhang     }
279f4fca6d4SMatthew G. Knepley     fftw->finarray  = (PetscScalar *) x_array;
280b2b77a04SHong Zhang     fftw->foutarray = y_array;
281b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
282b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
283b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
284b2b77a04SHong Zhang   } else { /* use existing plan */
285b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
286b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
287b2b77a04SHong Zhang     } else {
288b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
289b2b77a04SHong Zhang     }
290b2b77a04SHong Zhang   }
291b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
292f4fca6d4SMatthew G. Knepley   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
293b2b77a04SHong Zhang   PetscFunctionReturn(0);
294b2b77a04SHong Zhang }
295b2b77a04SHong Zhang 
296*0cf2e031SBarry Smith /*
297*0cf2e031SBarry Smith    MatMultTranspose_MPIFFTW performs parallel backward DFT
29897504071SAmlan Barua    Input parameter:
2993564f093SHong Zhang      A - the matrix
30097504071SAmlan Barua      x - the vector on which BDFT will be performed
30197504071SAmlan Barua 
30297504071SAmlan Barua    Output parameter:
30397504071SAmlan Barua      y - vector that stores result of BDFT
30497504071SAmlan Barua */
305b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
306b2b77a04SHong Zhang {
307b2b77a04SHong Zhang   PetscErrorCode ierr;
308b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
309b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
310f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
311f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
312c92418ccSAmlan Barua   PetscInt       ndim = fft->ndim,*dim = fft->dim;
313ce94432eSBarry Smith   MPI_Comm       comm;
314b2b77a04SHong Zhang 
315b2b77a04SHong Zhang   PetscFunctionBegin;
316ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
317f4fca6d4SMatthew G. Knepley   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
318b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
319b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
320b2b77a04SHong Zhang     switch (ndim) {
321b2b77a04SHong Zhang     case 1:
3223c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
323b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
324ae0a50aaSHong Zhang #else
3254f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
3263c3a9c75SAmlan Barua #endif
327b2b77a04SHong Zhang       break;
328b2b77a04SHong Zhang     case 2:
32997504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
330b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3313c3a9c75SAmlan Barua #else
3323c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3333c3a9c75SAmlan Barua #endif
334b2b77a04SHong Zhang       break;
335b2b77a04SHong Zhang     case 3:
3363c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
337b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3383c3a9c75SAmlan Barua #else
3393c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3403c3a9c75SAmlan Barua #endif
341b2b77a04SHong Zhang       break;
342b2b77a04SHong Zhang     default:
3433c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
344c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3453c3a9c75SAmlan Barua #else
3463c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3473c3a9c75SAmlan Barua #endif
348b2b77a04SHong Zhang       break;
349b2b77a04SHong Zhang     }
350f4fca6d4SMatthew G. Knepley     fftw->binarray  = (PetscScalar *) x_array;
351b2b77a04SHong Zhang     fftw->boutarray = y_array;
3522f613bf5SBarry Smith     fftw_execute(fftw->p_backward);
353b2b77a04SHong Zhang   } else { /* use existing plan */
354b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
355b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
356b2b77a04SHong Zhang     } else {
3572f613bf5SBarry Smith       fftw_execute(fftw->p_backward);
358b2b77a04SHong Zhang     }
359b2b77a04SHong Zhang   }
360b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
361f4fca6d4SMatthew G. Knepley   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
362b2b77a04SHong Zhang   PetscFunctionReturn(0);
363b2b77a04SHong Zhang }
364*0cf2e031SBarry Smith #endif
365b2b77a04SHong Zhang 
366b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
367b2b77a04SHong Zhang {
368b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
369b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
370b2b77a04SHong Zhang   PetscErrorCode ierr;
371b2b77a04SHong Zhang 
372b2b77a04SHong Zhang   PetscFunctionBegin;
373b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
374b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
3758c1d5ab3SHong Zhang   if (fftw->iodims) {
3768c1d5ab3SHong Zhang     free(fftw->iodims);
3778c1d5ab3SHong Zhang   }
378bb5bf6f6SHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
379bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
380*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
3816ccf0b3eSHong Zhang   fftw_mpi_cleanup();
382*0cf2e031SBarry Smith #endif
383b2b77a04SHong Zhang   PetscFunctionReturn(0);
384b2b77a04SHong Zhang }
385b2b77a04SHong Zhang 
386*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
387c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
388b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
389b2b77a04SHong Zhang {
390b2b77a04SHong Zhang   PetscErrorCode ierr;
391b2b77a04SHong Zhang   PetscScalar    *array;
392b2b77a04SHong Zhang 
393b2b77a04SHong Zhang   PetscFunctionBegin;
394b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
3952f613bf5SBarry Smith   fftw_free((fftw_complex*)array);
396b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
397b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
398b2b77a04SHong Zhang   PetscFunctionReturn(0);
399b2b77a04SHong Zhang }
400*0cf2e031SBarry Smith #endif
401b2b77a04SHong Zhang 
402*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
4035b113e21Ss-sajid-ali static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin,Vec *fin_new)
4045b113e21Ss-sajid-ali {
4055b113e21Ss-sajid-ali   PetscErrorCode ierr;
4065b113e21Ss-sajid-ali   Mat            A;
4075b113e21Ss-sajid-ali 
4085b113e21Ss-sajid-ali   PetscFunctionBegin;
4095b113e21Ss-sajid-ali   ierr = PetscObjectQuery((PetscObject)fin,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr);
4105b113e21Ss-sajid-ali   ierr = MatCreateVecsFFTW_FFTW(A,fin_new,NULL,NULL);CHKERRQ(ierr);
4115b113e21Ss-sajid-ali   PetscFunctionReturn(0);
4125b113e21Ss-sajid-ali }
4135b113e21Ss-sajid-ali 
4145b113e21Ss-sajid-ali static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout,Vec *fout_new)
4155b113e21Ss-sajid-ali {
4165b113e21Ss-sajid-ali   PetscErrorCode ierr;
4175b113e21Ss-sajid-ali   Mat            A;
4185b113e21Ss-sajid-ali 
4195b113e21Ss-sajid-ali   PetscFunctionBegin;
4205b113e21Ss-sajid-ali   ierr = PetscObjectQuery((PetscObject)fout,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr);
4215b113e21Ss-sajid-ali   ierr = MatCreateVecsFFTW_FFTW(A,NULL,fout_new,NULL);CHKERRQ(ierr);
4225b113e21Ss-sajid-ali   PetscFunctionReturn(0);
4235b113e21Ss-sajid-ali }
4245b113e21Ss-sajid-ali 
4255b113e21Ss-sajid-ali static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
4265b113e21Ss-sajid-ali {
4275b113e21Ss-sajid-ali   PetscErrorCode ierr;
4285b113e21Ss-sajid-ali   Mat            A;
4295b113e21Ss-sajid-ali 
4305b113e21Ss-sajid-ali   PetscFunctionBegin;
4315b113e21Ss-sajid-ali   ierr = PetscObjectQuery((PetscObject)bout,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr);
4325b113e21Ss-sajid-ali   ierr = MatCreateVecsFFTW_FFTW(A,NULL,NULL,bout_new);CHKERRQ(ierr);
4335b113e21Ss-sajid-ali   PetscFunctionReturn(0);
4345b113e21Ss-sajid-ali }
435*0cf2e031SBarry Smith #endif
4365b113e21Ss-sajid-ali 
4374be45526SHong Zhang /*@
4382a7a6963SBarry Smith    MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
4394f7415efSAmlan Barua      parallel layout determined by FFTW
4404f7415efSAmlan Barua 
4414f7415efSAmlan Barua    Collective on Mat
4424f7415efSAmlan Barua 
4434f7415efSAmlan Barua    Input Parameter:
4443564f093SHong Zhang .   A - the matrix
4454f7415efSAmlan Barua 
446d8d19677SJose E. Roman    Output Parameters:
447cc55f3b1SHong Zhang +   x - (optional) input vector of forward FFTW
4486b867d5aSJose E. Roman .   y - (optional) output vector of forward FFTW
449cc55f3b1SHong Zhang -   z - (optional) output vector of backward FFTW
4504f7415efSAmlan Barua 
4514f7415efSAmlan Barua   Level: advanced
4523564f093SHong Zhang 
45397504071SAmlan Barua   Note: The parallel layout of output of forward FFTW is always same as the input
45497504071SAmlan Barua         of the backward FFTW. But parallel layout of the input vector of forward
45597504071SAmlan Barua         FFTW might not be same as the output of backward FFTW.
45697504071SAmlan Barua         Also note that we need to provide enough space while doing parallel real transform.
45797504071SAmlan Barua         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
45897504071SAmlan Barua         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
459e0ec536eSAmlan Barua         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
460e0ec536eSAmlan Barua         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
461e0ec536eSAmlan Barua         zeros if it is odd only one zero is needed.
462e0ec536eSAmlan Barua         Lastly one needs some scratch space at the end of data set in each process. alloc_local
463e0ec536eSAmlan Barua         figures out how much space is needed, i.e. it figures out the data+scratch space for
464e0ec536eSAmlan Barua         each processor and returns that.
4654f7415efSAmlan Barua 
46675f45d78SBarry Smith .seealso: MatCreateFFT()
4674be45526SHong Zhang @*/
4682a7a6963SBarry Smith PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
4694be45526SHong Zhang {
4704be45526SHong Zhang   PetscErrorCode ierr;
471b4c3921fSHong Zhang 
4724be45526SHong Zhang   PetscFunctionBegin;
473163d334eSBarry Smith   ierr = PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
4744be45526SHong Zhang   PetscFunctionReturn(0);
4754be45526SHong Zhang }
4764be45526SHong Zhang 
4772a7a6963SBarry Smith PetscErrorCode  MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
4784f7415efSAmlan Barua {
4794f7415efSAmlan Barua   PetscErrorCode ierr;
4804f7415efSAmlan Barua   PetscMPIInt    size,rank;
481ce94432eSBarry Smith   MPI_Comm       comm;
4824f7415efSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
4834f7415efSAmlan Barua 
4844f7415efSAmlan Barua   PetscFunctionBegin;
4854f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
4864f7415efSAmlan Barua   PetscValidType(A,1);
487ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4884f7415efSAmlan Barua 
489ffc4695bSBarry Smith   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
490ffc4695bSBarry Smith   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
4914f7415efSAmlan Barua   if (size == 1) { /* sequential case */
4924f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
493*0cf2e031SBarry Smith     if (fin)  {ierr = VecCreateSeq(PETSC_COMM_SELF,fft->N,fin);CHKERRQ(ierr);}
494*0cf2e031SBarry Smith     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,fft->N,fout);CHKERRQ(ierr);}
495*0cf2e031SBarry Smith     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,fft->N,bout);CHKERRQ(ierr);}
4964f7415efSAmlan Barua #else
497*0cf2e031SBarry Smith     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,fft->n,fin);CHKERRQ(ierr);}
498*0cf2e031SBarry Smith     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,fft->n,fout);CHKERRQ(ierr);}
499*0cf2e031SBarry Smith     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,fft->n,bout);CHKERRQ(ierr);}
5004f7415efSAmlan Barua #endif
501*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
50297504071SAmlan Barua   } else { /* parallel cases */
503*0cf2e031SBarry Smith     Mat_FFTW     *fftw = (Mat_FFTW*)fft->data;
504*0cf2e031SBarry Smith     PetscInt     ndim  = fft->ndim,*dim = fft->dim;
5054f7415efSAmlan Barua     ptrdiff_t    alloc_local,local_n0,local_0_start;
5069496c9aeSAmlan Barua     ptrdiff_t    local_n1;
5079496c9aeSAmlan Barua     fftw_complex *data_fout;
5089496c9aeSAmlan Barua     ptrdiff_t    local_1_start;
5099496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
5109496c9aeSAmlan Barua     fftw_complex *data_fin,*data_bout;
5119496c9aeSAmlan Barua #else
5124f7415efSAmlan Barua     double       *data_finr,*data_boutr;
5136ccf0b3eSHong Zhang     PetscInt     n1,N1;
5149496c9aeSAmlan Barua     ptrdiff_t    temp;
5159496c9aeSAmlan Barua #endif
5169496c9aeSAmlan Barua 
5174f7415efSAmlan Barua     switch (ndim) {
5184f7415efSAmlan Barua     case 1:
51957625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5204f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
52157625b84SAmlan Barua #else
52257625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
52357625b84SAmlan Barua       if (fin) {
52457625b84SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
525*0cf2e031SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n0,fft->N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5265b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
5275b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
52857625b84SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
52957625b84SAmlan Barua       }
53057625b84SAmlan Barua       if (fout) {
53157625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
532*0cf2e031SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5335b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
5345b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
53557625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
53657625b84SAmlan Barua       }
53757625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
53857625b84SAmlan Barua       if (bout) {
53957625b84SAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
540*0cf2e031SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5415b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
5425b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
54357625b84SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
54457625b84SAmlan Barua       }
54557625b84SAmlan Barua       break;
54657625b84SAmlan Barua #endif
5474f7415efSAmlan Barua     case 2:
54897504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
5494f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5504f7415efSAmlan Barua       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
5514f7415efSAmlan Barua       if (fin) {
5524f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
553778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
5545b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
5555b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5564f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5574f7415efSAmlan Barua       }
55857625b84SAmlan Barua       if (fout) {
55957625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
560778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5615b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
5625b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
56357625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
56457625b84SAmlan Barua       }
5655b113e21Ss-sajid-ali       if (bout) {
5665b113e21Ss-sajid-ali         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
5675b113e21Ss-sajid-ali         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
5685b113e21Ss-sajid-ali         ierr       = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
5695b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5705b113e21Ss-sajid-ali         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5715b113e21Ss-sajid-ali       }
5724f7415efSAmlan Barua #else
5734f7415efSAmlan Barua       /* Get local size */
5744f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
5754f7415efSAmlan Barua       if (fin) {
5764f7415efSAmlan Barua         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
577*0cf2e031SBarry Smith         ierr     = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5785b113e21Ss-sajid-ali         ierr     = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
5795b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5804f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5814f7415efSAmlan Barua       }
5824f7415efSAmlan Barua       if (fout) {
5834f7415efSAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
584*0cf2e031SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5855b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
5865b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
5874f7415efSAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5884f7415efSAmlan Barua       }
5894f7415efSAmlan Barua       if (bout) {
5904f7415efSAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
591*0cf2e031SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5925b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
5935b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5944f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5954f7415efSAmlan Barua       }
5964f7415efSAmlan Barua #endif
5974f7415efSAmlan Barua       break;
5984f7415efSAmlan Barua     case 3:
5994f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6004f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
6014f7415efSAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
6024f7415efSAmlan Barua       if (fin) {
6034f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
604778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
6055b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6065b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6074f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6084f7415efSAmlan Barua       }
6095b113e21Ss-sajid-ali       if (fout) {
6105b113e21Ss-sajid-ali         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6115b113e21Ss-sajid-ali         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6125b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6135b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6145b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6155b113e21Ss-sajid-ali       }
6164f7415efSAmlan Barua       if (bout) {
6174f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
618778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
6195b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6205b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6214f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6224f7415efSAmlan Barua       }
6234f7415efSAmlan Barua #else
6240c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
6250c9b18e4SAmlan Barua       if (fin) {
6260c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
627*0cf2e031SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
6285b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6295b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6300c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6310c9b18e4SAmlan Barua       }
6320c9b18e4SAmlan Barua       if (fout) {
6330c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
634*0cf2e031SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6355b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6365b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6370c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6380c9b18e4SAmlan Barua       }
6390c9b18e4SAmlan Barua       if (bout) {
6400c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
641*0cf2e031SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
6425b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6435b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6440c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6450c9b18e4SAmlan Barua       }
6464f7415efSAmlan Barua #endif
6474f7415efSAmlan Barua       break;
6484f7415efSAmlan Barua     default:
6494f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6504f7415efSAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
6514f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
6524f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
653*0cf2e031SBarry Smith       N1          = 2*fft->N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
6544f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
6554f7415efSAmlan Barua 
6564f7415efSAmlan Barua       if (fin) {
6574f7415efSAmlan Barua         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
658*0cf2e031SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
6595b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6605b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6614f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6624f7415efSAmlan Barua       }
6635b113e21Ss-sajid-ali       if (fout) {
6645b113e21Ss-sajid-ali         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
665*0cf2e031SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6665b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6675b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6685b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6695b113e21Ss-sajid-ali       }
6704f7415efSAmlan Barua       if (bout) {
6714f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
672*0cf2e031SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
6735b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6745b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6759496c9aeSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6764f7415efSAmlan Barua       }
6774f7415efSAmlan Barua #else
6780c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
6790c9b18e4SAmlan Barua       if (fin) {
6800c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
681*0cf2e031SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
6825b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6835b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6840c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6850c9b18e4SAmlan Barua       }
6860c9b18e4SAmlan Barua       if (fout) {
6870c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
688*0cf2e031SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6895b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6905b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6910c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6920c9b18e4SAmlan Barua       }
6930c9b18e4SAmlan Barua       if (bout) {
6940c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
695*0cf2e031SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
6965b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6975b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6980c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6990c9b18e4SAmlan Barua       }
7004f7415efSAmlan Barua #endif
7014f7415efSAmlan Barua       break;
7024f7415efSAmlan Barua     }
703f9d91177SJunchao Zhang     /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
704f9d91177SJunchao Zhang        v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
705f9d91177SJunchao Zhang        memory leaks. We void these pointers here to avoid acident uses.
706f9d91177SJunchao Zhang     */
707f9d91177SJunchao Zhang     if (fin)  (*fin)->ops->replacearray = NULL;
708f9d91177SJunchao Zhang     if (fout) (*fout)->ops->replacearray = NULL;
709f9d91177SJunchao Zhang     if (bout) (*bout)->ops->replacearray = NULL;
710*0cf2e031SBarry Smith #endif
7114f7415efSAmlan Barua   }
7124f7415efSAmlan Barua   PetscFunctionReturn(0);
7134f7415efSAmlan Barua }
7144f7415efSAmlan Barua 
7153564f093SHong Zhang /*@
7163564f093SHong Zhang    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
71754efbe56SHong Zhang 
7183564f093SHong Zhang    Collective on Mat
7193564f093SHong Zhang 
7203564f093SHong Zhang    Input Parameters:
7213564f093SHong Zhang +  A - FFTW matrix
7223564f093SHong Zhang -  x - the PETSc vector
7233564f093SHong Zhang 
7243564f093SHong Zhang    Output Parameters:
7253564f093SHong Zhang .  y - the FFTW vector
7263564f093SHong Zhang 
727b2b77a04SHong Zhang   Options Database Keys:
7283564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags
729b2b77a04SHong Zhang 
730b2b77a04SHong Zhang    Level: intermediate
7313564f093SHong Zhang 
73297504071SAmlan Barua    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
73397504071SAmlan Barua          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
73497504071SAmlan Barua          zeros. This routine does that job by scattering operation.
73597504071SAmlan Barua 
7363564f093SHong Zhang .seealso: VecScatterFFTWToPetsc()
7373564f093SHong Zhang @*/
7383564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
7393564f093SHong Zhang {
7403564f093SHong Zhang   PetscErrorCode ierr;
741b2b77a04SHong Zhang 
7423564f093SHong Zhang   PetscFunctionBegin;
743163d334eSBarry Smith   ierr = PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
7443564f093SHong Zhang   PetscFunctionReturn(0);
7453564f093SHong Zhang }
7463c3a9c75SAmlan Barua 
74774a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
7483c3a9c75SAmlan Barua {
7493c3a9c75SAmlan Barua   PetscErrorCode ierr;
750ce94432eSBarry Smith   MPI_Comm       comm;
7513c3a9c75SAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
7529496c9aeSAmlan Barua   PetscInt       low;
7537a21806cSHong Zhang   PetscMPIInt    rank,size;
7547a21806cSHong Zhang   PetscInt       vsize,vsize1;
7553c3a9c75SAmlan Barua   VecScatter     vecscat;
756*0cf2e031SBarry Smith   IS             list1;
7573c3a9c75SAmlan Barua 
7583564f093SHong Zhang   PetscFunctionBegin;
759ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
760ffc4695bSBarry Smith   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
761ffc4695bSBarry Smith   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
7621e1ea65dSPierre Jolivet   ierr = VecGetOwnershipRange(y,&low,NULL);CHKERRQ(ierr);
7633c3a9c75SAmlan Barua 
7643564f093SHong Zhang   if (size==1) {
7658ca4c034SAmlan Barua     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
7668ca4c034SAmlan Barua     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
767*0cf2e031SBarry Smith     ierr = ISCreateStride(PETSC_COMM_SELF,fft->N,0,1,&list1);CHKERRQ(ierr);
7689448b7f1SJunchao Zhang     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
7696971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7706971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7716971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
772b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
773*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
7743564f093SHong Zhang   } else {
775*0cf2e031SBarry Smith     Mat_FFTW   *fftw = (Mat_FFTW*)fft->data;
776*0cf2e031SBarry Smith     PetscInt   ndim  = fft->ndim,*dim = fft->dim;
777*0cf2e031SBarry Smith     ptrdiff_t  local_n0,local_0_start;
778*0cf2e031SBarry Smith     ptrdiff_t  local_n1,local_1_start;
779*0cf2e031SBarry Smith     IS         list2;
780*0cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX)
781*0cf2e031SBarry Smith     PetscInt   i,j,k,partial_dim;
782*0cf2e031SBarry Smith     PetscInt   *indx1, *indx2, tempindx, tempindx1;
783*0cf2e031SBarry Smith     PetscInt   NM;
784*0cf2e031SBarry Smith     ptrdiff_t  temp;
785*0cf2e031SBarry Smith #endif
786*0cf2e031SBarry Smith 
7873c3a9c75SAmlan Barua     switch (ndim) {
7883c3a9c75SAmlan Barua     case 1:
78964657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7907a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
79126fbe8dcSKarl Rupp 
7921e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);CHKERRQ(ierr);
7931e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0,low,1,&list2);CHKERRQ(ierr);
7949448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
79564657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
79664657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
79764657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
79864657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
79964657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
80064657d84SAmlan Barua #else
801e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
80264657d84SAmlan Barua #endif
8033c3a9c75SAmlan Barua       break;
8043c3a9c75SAmlan Barua     case 2:
805bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
8067a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
80726fbe8dcSKarl Rupp 
8081e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);CHKERRQ(ierr);
8091e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);CHKERRQ(ierr);
8109448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
811bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
812bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
813bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
814bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
815bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
816bd59e6c6SAmlan Barua #else
817c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
81826fbe8dcSKarl Rupp 
819854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
820854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
8213c3a9c75SAmlan Barua 
8223564f093SHong Zhang       if (dim[1]%2==0) {
8233c3a9c75SAmlan Barua         NM = dim[1]+2;
8243564f093SHong Zhang       } else {
8253c3a9c75SAmlan Barua         NM = dim[1]+1;
8263564f093SHong Zhang       }
8273c3a9c75SAmlan Barua       for (i=0; i<local_n0; i++) {
8283c3a9c75SAmlan Barua         for (j=0; j<dim[1]; j++) {
8293c3a9c75SAmlan Barua           tempindx  = i*dim[1] + j;
8303c3a9c75SAmlan Barua           tempindx1 = i*NM + j;
83126fbe8dcSKarl Rupp 
8325b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
8333c3a9c75SAmlan Barua           indx2[tempindx]=low+tempindx1;
8343c3a9c75SAmlan Barua         }
8353c3a9c75SAmlan Barua       }
8363c3a9c75SAmlan Barua 
8373c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
8383c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
8393c3a9c75SAmlan Barua 
8409448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
841f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
842f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
843f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
844b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
845b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
846b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
847b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
848bd59e6c6SAmlan Barua #endif
8499496c9aeSAmlan Barua       break;
8503c3a9c75SAmlan Barua 
8513c3a9c75SAmlan Barua     case 3:
852bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
8537a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
85426fbe8dcSKarl Rupp 
8551e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);CHKERRQ(ierr);
8561e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);CHKERRQ(ierr);
8579448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
858bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
860bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
861bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
862bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
863bd59e6c6SAmlan Barua #else
864c4762a1bSJed Brown       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
865f1ade23cSHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform");
8667a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
86726fbe8dcSKarl Rupp 
868854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
869854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
87051d1eed7SAmlan Barua 
871db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
872db4deed7SKarl Rupp       else             NM = dim[2]+1;
87351d1eed7SAmlan Barua 
87451d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
87551d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
87651d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
87751d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
87851d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
87926fbe8dcSKarl Rupp 
88051d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
88151d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
88251d1eed7SAmlan Barua           }
88351d1eed7SAmlan Barua         }
88451d1eed7SAmlan Barua       }
88551d1eed7SAmlan Barua 
88651d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
88751d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
8889448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
88951d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
89051d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
89151d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
892b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
893b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
894b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
895b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
896bd59e6c6SAmlan Barua #endif
8979496c9aeSAmlan Barua       break;
8983c3a9c75SAmlan Barua 
8993c3a9c75SAmlan Barua     default:
900bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
9017a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
90226fbe8dcSKarl Rupp 
9031e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);CHKERRQ(ierr);
9041e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);CHKERRQ(ierr);
9059448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
906bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
907bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
908bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
909bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
910bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
911bd59e6c6SAmlan Barua #else
912c4762a1bSJed Brown       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
913f1ade23cSHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform");
914e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
91526fbe8dcSKarl Rupp 
916e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
91726fbe8dcSKarl Rupp 
9187a21806cSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
91926fbe8dcSKarl Rupp 
920e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
921e5d7f247SAmlan Barua 
922e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
923e5d7f247SAmlan Barua 
924854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
925854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
926e5d7f247SAmlan Barua 
927db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
928db4deed7SKarl Rupp       else                  NM = 1;
929e5d7f247SAmlan Barua 
9306971673cSAmlan Barua       j = low;
9313564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
9326971673cSAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
9336971673cSAmlan Barua         indx2[i] = j;
93426fbe8dcSKarl Rupp         if (k%dim[ndim-1]==0) j+=NM;
9356971673cSAmlan Barua         j++;
9366971673cSAmlan Barua       }
9376971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
9386971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
9399448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
9406971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9416971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9426971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
943b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
944b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
945b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
946b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
947bd59e6c6SAmlan Barua #endif
9489496c9aeSAmlan Barua       break;
9493c3a9c75SAmlan Barua     }
950*0cf2e031SBarry Smith #endif
951e81bb599SAmlan Barua   }
9523564f093SHong Zhang   PetscFunctionReturn(0);
9533c3a9c75SAmlan Barua }
9543c3a9c75SAmlan Barua 
9553564f093SHong Zhang /*@
9563564f093SHong Zhang    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
9573564f093SHong Zhang 
9583564f093SHong Zhang    Collective on Mat
9593564f093SHong Zhang 
9603564f093SHong Zhang     Input Parameters:
9613564f093SHong Zhang +   A - FFTW matrix
9623564f093SHong Zhang -   x - FFTW vector
9633564f093SHong Zhang 
9643564f093SHong Zhang    Output Parameters:
9653564f093SHong Zhang .  y - PETSc vector
9663564f093SHong Zhang 
9673564f093SHong Zhang    Level: intermediate
9683564f093SHong Zhang 
9693564f093SHong Zhang    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
9703564f093SHong Zhang          VecScatterFFTWToPetsc removes those extra zeros.
9713564f093SHong Zhang 
9723564f093SHong Zhang .seealso: VecScatterPetscToFFTW()
9733564f093SHong Zhang @*/
97474a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
9753c3a9c75SAmlan Barua {
9763c3a9c75SAmlan Barua   PetscErrorCode ierr;
9775fd66863SKarl Rupp 
9783c3a9c75SAmlan Barua   PetscFunctionBegin;
979163d334eSBarry Smith   ierr = PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
9803c3a9c75SAmlan Barua   PetscFunctionReturn(0);
9813c3a9c75SAmlan Barua }
98254efbe56SHong Zhang 
98374a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
9845b3e41ffSAmlan Barua {
9855b3e41ffSAmlan Barua   PetscErrorCode ierr;
986ce94432eSBarry Smith   MPI_Comm       comm;
9875b3e41ffSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
9889496c9aeSAmlan Barua   PetscInt       low;
9897a21806cSHong Zhang   PetscMPIInt    rank,size;
9905b3e41ffSAmlan Barua   VecScatter     vecscat;
991*0cf2e031SBarry Smith   IS             list1;
9929496c9aeSAmlan Barua 
9933564f093SHong Zhang   PetscFunctionBegin;
994ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
995ffc4695bSBarry Smith   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
996ffc4695bSBarry Smith   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
9970298fd71SBarry Smith   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
9985b3e41ffSAmlan Barua 
999e81bb599SAmlan Barua   if (size==1) {
1000*0cf2e031SBarry Smith     ierr = ISCreateStride(comm,fft->N,0,1,&list1);CHKERRQ(ierr);
10019448b7f1SJunchao Zhang     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
10026971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10036971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10046971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1005b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
1006e81bb599SAmlan Barua 
1007*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
10083564f093SHong Zhang   } else {
1009*0cf2e031SBarry Smith     Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
1010*0cf2e031SBarry Smith     PetscInt       ndim  = fft->ndim,*dim = fft->dim;
1011*0cf2e031SBarry Smith     ptrdiff_t      local_n0,local_0_start;
1012*0cf2e031SBarry Smith     ptrdiff_t      local_n1,local_1_start;
1013*0cf2e031SBarry Smith     IS             list2;
1014*0cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX)
1015*0cf2e031SBarry Smith     PetscInt       i,j,k,partial_dim;
1016*0cf2e031SBarry Smith     PetscInt       *indx1, *indx2, tempindx, tempindx1;
1017*0cf2e031SBarry Smith     PetscInt       NM;
1018*0cf2e031SBarry Smith     ptrdiff_t      temp;
1019*0cf2e031SBarry Smith #endif
1020e81bb599SAmlan Barua     switch (ndim) {
1021e81bb599SAmlan Barua     case 1:
102264657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10237a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
102426fbe8dcSKarl Rupp 
10251e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);CHKERRQ(ierr);
10261e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n1,low,1,&list2);CHKERRQ(ierr);
10279448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
102864657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
102964657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
103064657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
103164657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
103264657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
103364657d84SAmlan Barua #else
10346a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
103564657d84SAmlan Barua #endif
10365b3e41ffSAmlan Barua       break;
10375b3e41ffSAmlan Barua     case 2:
1038bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10397a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
104026fbe8dcSKarl Rupp 
10411e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);CHKERRQ(ierr);
10421e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);CHKERRQ(ierr);
10439448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1044bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1045bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1046bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1047bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1048bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1049bd59e6c6SAmlan Barua #else
10507a21806cSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
105126fbe8dcSKarl Rupp 
1052854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
1053854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
10545b3e41ffSAmlan Barua 
1055db4deed7SKarl Rupp       if (dim[1]%2==0) NM = dim[1]+2;
1056db4deed7SKarl Rupp       else             NM = dim[1]+1;
10575b3e41ffSAmlan Barua 
10585b3e41ffSAmlan Barua       for (i=0; i<local_n0; i++) {
10595b3e41ffSAmlan Barua         for (j=0; j<dim[1]; j++) {
10605b3e41ffSAmlan Barua           tempindx = i*dim[1] + j;
10615b3e41ffSAmlan Barua           tempindx1 = i*NM + j;
106226fbe8dcSKarl Rupp 
10635b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
10645b3e41ffSAmlan Barua           indx2[tempindx]=low+tempindx1;
10655b3e41ffSAmlan Barua         }
10665b3e41ffSAmlan Barua       }
10675b3e41ffSAmlan Barua 
10685b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
10695b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
10705b3e41ffSAmlan Barua 
10719448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
10725b3e41ffSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10735b3e41ffSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10745b3e41ffSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1075b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1076b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1077b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1078b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1079bd59e6c6SAmlan Barua #endif
10809496c9aeSAmlan Barua       break;
10815b3e41ffSAmlan Barua     case 3:
1082bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10837a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
108426fbe8dcSKarl Rupp 
10851e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);CHKERRQ(ierr);
10861e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);CHKERRQ(ierr);
10879448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1088bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1089bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1090bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1091bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1092bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1093bd59e6c6SAmlan Barua #else
10947a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
109526fbe8dcSKarl Rupp 
1096854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1097854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
109851d1eed7SAmlan Barua 
1099db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
1100db4deed7SKarl Rupp       else             NM = dim[2]+1;
110151d1eed7SAmlan Barua 
110251d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
110351d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
110451d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
110551d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
110651d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
110726fbe8dcSKarl Rupp 
110851d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
110951d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
111051d1eed7SAmlan Barua           }
111151d1eed7SAmlan Barua         }
111251d1eed7SAmlan Barua       }
111351d1eed7SAmlan Barua 
111451d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
111551d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
111651d1eed7SAmlan Barua 
11179448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
111851d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
111951d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
112051d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1121b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1122b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1123b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1124b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1125bd59e6c6SAmlan Barua #endif
11269496c9aeSAmlan Barua       break;
11275b3e41ffSAmlan Barua     default:
1128bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
11297a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
113026fbe8dcSKarl Rupp 
11311e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);CHKERRQ(ierr);
11321e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);CHKERRQ(ierr);
11339448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1134bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1135bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1136bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1137bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1138bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1139bd59e6c6SAmlan Barua #else
1140ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
114126fbe8dcSKarl Rupp 
1142ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
114326fbe8dcSKarl Rupp 
1144c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
114526fbe8dcSKarl Rupp 
1146ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1147ba6e06d0SAmlan Barua 
1148ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1149ba6e06d0SAmlan Barua 
1150854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1151854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1152ba6e06d0SAmlan Barua 
1153db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
1154db4deed7SKarl Rupp       else                  NM = 1;
1155ba6e06d0SAmlan Barua 
1156ba6e06d0SAmlan Barua       j = low;
11573564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1158ba6e06d0SAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
1159ba6e06d0SAmlan Barua         indx2[i] = j;
11603564f093SHong Zhang         if (k%dim[ndim-1]==0) j+=NM;
1161ba6e06d0SAmlan Barua         j++;
1162ba6e06d0SAmlan Barua       }
1163ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1164ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1165ba6e06d0SAmlan Barua 
11669448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1167ba6e06d0SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1168ba6e06d0SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1169ba6e06d0SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1170b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1171b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1172b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1173b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1174bd59e6c6SAmlan Barua #endif
11759496c9aeSAmlan Barua       break;
11765b3e41ffSAmlan Barua     }
1177*0cf2e031SBarry Smith #endif
1178e81bb599SAmlan Barua   }
11793564f093SHong Zhang   PetscFunctionReturn(0);
11805b3e41ffSAmlan Barua }
11815b3e41ffSAmlan Barua 
11823c3a9c75SAmlan Barua /*
11833564f093SHong Zhang     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
11843564f093SHong Zhang 
11853c3a9c75SAmlan Barua   Options Database Keys:
11863c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
11873c3a9c75SAmlan Barua 
11883c3a9c75SAmlan Barua    Level: intermediate
11893c3a9c75SAmlan Barua 
11903c3a9c75SAmlan Barua */
11918cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1192b2b77a04SHong Zhang {
1193b2b77a04SHong Zhang   PetscErrorCode ierr;
1194ce94432eSBarry Smith   MPI_Comm       comm;
1195b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
1196b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1197*0cf2e031SBarry Smith   PetscInt       ndim = fft->ndim,*dim = fft->dim;
11985274ed1bSHong Zhang   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
11995274ed1bSHong Zhang   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1200b2b77a04SHong Zhang   PetscBool      flg;
12014f48bc67SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr;
120211902ff2SHong Zhang   PetscMPIInt    size,rank;
12039496c9aeSAmlan Barua   ptrdiff_t      *pdim;
12049496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1205*0cf2e031SBarry Smith   PetscInt       tot_dim;
12069496c9aeSAmlan Barua #endif
12079496c9aeSAmlan Barua 
1208b2b77a04SHong Zhang   PetscFunctionBegin;
1209ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1210ffc4695bSBarry Smith   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
1211ffc4695bSBarry Smith   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
1212c92418ccSAmlan Barua 
1213*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
12141878d478SAmlan Barua   fftw_mpi_init();
1215*0cf2e031SBarry Smith #endif
121611902ff2SHong Zhang   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
121711902ff2SHong Zhang   pdim[0] = dim[0];
12188ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12198ca4c034SAmlan Barua   tot_dim = 2*dim[0];
12208ca4c034SAmlan Barua #endif
12213564f093SHong Zhang   for (ctr=1; ctr<ndim; ctr++) {
12226882372aSHong Zhang     partial_dim *= dim[ctr];
122311902ff2SHong Zhang     pdim[ctr]    = dim[ctr];
12248ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1225db4deed7SKarl Rupp     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1226db4deed7SKarl Rupp     else             tot_dim *= dim[ctr];
12278ca4c034SAmlan Barua #endif
12286882372aSHong Zhang   }
12293c3a9c75SAmlan Barua 
1230b2b77a04SHong Zhang   if (size == 1) {
1231e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1232*0cf2e031SBarry Smith     ierr = MatSetSizes(A,fft->N,fft->N,fft->N,fft->N);CHKERRQ(ierr);
1233*0cf2e031SBarry Smith     fft->n = fft->N;
1234e81bb599SAmlan Barua #else
1235e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1236*0cf2e031SBarry Smith     fft->n = tot_dim;
1237*0cf2e031SBarry Smith #endif
1238*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
1239*0cf2e031SBarry Smith   } else {
1240*0cf2e031SBarry Smith     ptrdiff_t local_n0,local_0_start,local_n1,local_1_start;
1241*0cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX)
1242*0cf2e031SBarry Smith     ptrdiff_t temp;
1243*0cf2e031SBarry Smith     PetscInt  N1;
1244e81bb599SAmlan Barua #endif
1245e81bb599SAmlan Barua 
1246b2b77a04SHong Zhang     switch (ndim) {
1247b2b77a04SHong Zhang     case 1:
12483c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12493c3a9c75SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1250e5d7f247SAmlan Barua #else
12517a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1252*0cf2e031SBarry Smith       fft->n = (PetscInt)local_n0;
1253*0cf2e031SBarry Smith       ierr   = MatSetSizes(A,local_n1,fft->n,fft->N,fft->N);CHKERRQ(ierr);
1254e5d7f247SAmlan Barua #endif
1255b2b77a04SHong Zhang       break;
1256b2b77a04SHong Zhang     case 2:
12575b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
12587a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1259*0cf2e031SBarry Smith       fft->n    = (PetscInt)local_n0*dim[1];
1260*0cf2e031SBarry Smith       ierr = MatSetSizes(A,fft->n,fft->n,fft->N,fft->N);CHKERRQ(ierr);
12615b3e41ffSAmlan Barua #else
1262c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
126326fbe8dcSKarl Rupp 
1264*0cf2e031SBarry Smith       fft->n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1265*0cf2e031SBarry Smith       ierr = MatSetSizes(A,fft->n,fft->n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));CHKERRQ(ierr);
12665b3e41ffSAmlan Barua #endif
1267b2b77a04SHong Zhang       break;
1268b2b77a04SHong Zhang     case 3:
126951d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12707a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
127126fbe8dcSKarl Rupp 
1272*0cf2e031SBarry Smith       fft->n = (PetscInt)local_n0*dim[1]*dim[2];
1273*0cf2e031SBarry Smith       ierr = MatSetSizes(A,fft->n,fft->n,fft->N,fft->N);CHKERRQ(ierr);
127451d1eed7SAmlan Barua #else
1275c3eba89aSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
127626fbe8dcSKarl Rupp 
1277*0cf2e031SBarry Smith       fft->n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1278*0cf2e031SBarry Smith       ierr = MatSetSizes(A,fft->n,fft->n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));CHKERRQ(ierr);
127951d1eed7SAmlan Barua #endif
1280b2b77a04SHong Zhang       break;
1281b2b77a04SHong Zhang     default:
1282b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12837a21806cSHong Zhang       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
128426fbe8dcSKarl Rupp 
1285*0cf2e031SBarry Smith       fft->n = (PetscInt)local_n0*partial_dim;
1286*0cf2e031SBarry Smith       ierr = MatSetSizes(A,fft->n,fft->n,fft->N,fft->N);CHKERRQ(ierr);
1287b3a17365SAmlan Barua #else
1288b3a17365SAmlan Barua       temp = pdim[ndim-1];
128926fbe8dcSKarl Rupp 
1290b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
129126fbe8dcSKarl Rupp 
1292c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
129326fbe8dcSKarl Rupp 
1294*0cf2e031SBarry Smith       fft->n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1295*0cf2e031SBarry Smith       N1 = 2*fft->N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
129626fbe8dcSKarl Rupp 
1297b3a17365SAmlan Barua       pdim[ndim-1] = temp;
129826fbe8dcSKarl Rupp 
1299*0cf2e031SBarry Smith       ierr = MatSetSizes(A,fft->n,fft->n,N1,N1);CHKERRQ(ierr);
1300b3a17365SAmlan Barua #endif
1301b2b77a04SHong Zhang       break;
1302b2b77a04SHong Zhang     }
1303*0cf2e031SBarry Smith #endif
1304b2b77a04SHong Zhang   }
13052277172eSMark Adams   free(pdim);
1306b2b77a04SHong Zhang   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1307b00a9115SJed Brown   ierr      = PetscNewLog(A,&fftw);CHKERRQ(ierr);
1308b2b77a04SHong Zhang   fft->data = (void*)fftw;
1309b2b77a04SHong Zhang 
13100c74a584SJed Brown   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1311e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
131226fbe8dcSKarl Rupp 
13135e806cb5SMatthew G. Knepley   ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr);
13148c1d5ab3SHong Zhang   if (size == 1) {
1315a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
1316a1b6d50cSHong Zhang     fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1317a1b6d50cSHong Zhang #else
13188c1d5ab3SHong Zhang     fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1319a1b6d50cSHong Zhang #endif
13208c1d5ab3SHong Zhang   }
132126fbe8dcSKarl Rupp 
1322b1301b2eSHong Zhang   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1323c92418ccSAmlan Barua 
1324f4259b30SLisandro Dalcin   fftw->p_forward  = NULL;
1325f4259b30SLisandro Dalcin   fftw->p_backward = NULL;
1326b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1327b2b77a04SHong Zhang 
1328b2b77a04SHong Zhang   if (size == 1) {
1329b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1330b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1331*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
1332b2b77a04SHong Zhang   } else {
1333b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1334b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1335*0cf2e031SBarry Smith #endif
1336b2b77a04SHong Zhang   }
1337b2b77a04SHong Zhang   fft->matdestroy = MatDestroy_FFTW;
1338b2b77a04SHong Zhang   A->assembled    = PETSC_TRUE;
13394a52bad8SHong Zhang   A->preallocated = PETSC_TRUE;
134026fbe8dcSKarl Rupp 
13412a7a6963SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr);
1342bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
1343bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1344b2b77a04SHong Zhang 
1345b2b77a04SHong Zhang   /* get runtime options */
1346ce94432eSBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
13475274ed1bSHong Zhang   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr);
13485274ed1bSHong Zhang   if (flg) {
13495274ed1bSHong Zhang     fftw->p_flag = iplans[p_flag];
13505274ed1bSHong Zhang   }
13514a52bad8SHong Zhang   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1352b2b77a04SHong Zhang   PetscFunctionReturn(0);
1353b2b77a04SHong Zhang }
13543c3a9c75SAmlan Barua 
1355