xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 163d334ea1df412efbe58c2e8fce360a2afeaf87)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t    ndim_fftw,*dim_fftw;
14a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
15a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
16a1b6d50cSHong Zhang #else
178c1d5ab3SHong Zhang   fftw_iodim   *iodims;
18a1b6d50cSHong Zhang #endif
19e5d7f247SAmlan Barua   PetscInt     partial_dim;
20b2b77a04SHong Zhang   fftw_plan    p_forward,p_backward;
21b2b77a04SHong Zhang   unsigned     p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
22b2b77a04SHong Zhang   PetscScalar  *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
23b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
24b2b77a04SHong Zhang } Mat_FFTW;
25b2b77a04SHong Zhang 
26b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
27b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
28b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
29b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
30b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
31b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
32b2b77a04SHong Zhang 
3397504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel
3497504071SAmlan Barua    Input parameter:
353564f093SHong Zhang      A - the matrix
3697504071SAmlan Barua      x - the vector on which FDFT will be performed
3797504071SAmlan Barua 
3897504071SAmlan Barua    Output parameter:
3997504071SAmlan Barua      y - vector that stores result of FDFT
4097504071SAmlan Barua */
41b2b77a04SHong Zhang #undef __FUNCT__
42b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
43b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
44b2b77a04SHong Zhang {
45b2b77a04SHong Zhang   PetscErrorCode ierr;
46b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
47b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
48f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
49f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
501acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
51a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
52a1b6d50cSHong Zhang   fftw_iodim64   *iodims;
53a1b6d50cSHong Zhang #else
548c1d5ab3SHong Zhang   fftw_iodim     *iodims;
55a1b6d50cSHong Zhang #endif
561acd80e4SHong Zhang   PetscInt       i;
571acd80e4SHong Zhang #endif
581acd80e4SHong Zhang   PetscInt       ndim = fft->ndim,*dim = fft->dim;
59b2b77a04SHong Zhang 
60b2b77a04SHong Zhang   PetscFunctionBegin;
61f4fca6d4SMatthew G. Knepley   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
62b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
63b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
64b2b77a04SHong Zhang     switch (ndim) {
65b2b77a04SHong Zhang     case 1:
6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
67b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6858a912c5SAmlan Barua #else
6958a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
7058a912c5SAmlan Barua #endif
71b2b77a04SHong Zhang       break;
72b2b77a04SHong Zhang     case 2:
7358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
74b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
7558a912c5SAmlan Barua #else
7658a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
7758a912c5SAmlan Barua #endif
78b2b77a04SHong Zhang       break;
79b2b77a04SHong Zhang     case 3:
8058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
81b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
8258a912c5SAmlan Barua #else
8358a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
8458a912c5SAmlan Barua #endif
85b2b77a04SHong Zhang       break;
86b2b77a04SHong Zhang     default:
8758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
88a1b6d50cSHong Zhang       iodims = fftw->iodims;
89a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
908c1d5ab3SHong Zhang       if (ndim) {
91a1b6d50cSHong Zhang         iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
928c1d5ab3SHong Zhang         iodims[ndim-1].is = iodims[ndim-1].os = 1;
938c1d5ab3SHong Zhang         for (i=ndim-2; i>=0; --i) {
94a1b6d50cSHong Zhang           iodims[i].n = (ptrdiff_t)dim[i];
958c1d5ab3SHong Zhang           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
968c1d5ab3SHong Zhang         }
978c1d5ab3SHong Zhang       }
98a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
99a1b6d50cSHong Zhang #else
100a1b6d50cSHong Zhang       if (ndim) {
101a1b6d50cSHong Zhang         iodims[ndim-1].n = (int)dim[ndim-1];
102a1b6d50cSHong Zhang         iodims[ndim-1].is = iodims[ndim-1].os = 1;
103a1b6d50cSHong Zhang         for (i=ndim-2; i>=0; --i) {
104a1b6d50cSHong Zhang           iodims[i].n = (int)dim[i];
105a1b6d50cSHong Zhang           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
106a1b6d50cSHong Zhang         }
107a1b6d50cSHong Zhang       }
108a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
109a1b6d50cSHong Zhang #endif
110a1b6d50cSHong Zhang 
11158a912c5SAmlan Barua #else
112a31b9140SHong Zhang       fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
11358a912c5SAmlan Barua #endif
114b2b77a04SHong Zhang       break;
115b2b77a04SHong Zhang     }
116f4fca6d4SMatthew G. Knepley     fftw->finarray  = (PetscScalar *) x_array;
117b2b77a04SHong Zhang     fftw->foutarray = y_array;
118b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
119b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
120b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
121b2b77a04SHong Zhang   } else { /* use existing plan */
122b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
1237e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
124b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
1257e4bc134SDominic Meiser #else
1267e4bc134SDominic Meiser       fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array);
1277e4bc134SDominic Meiser #endif
128b2b77a04SHong Zhang     } else {
129b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
130b2b77a04SHong Zhang     }
131b2b77a04SHong Zhang   }
132b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
133f4fca6d4SMatthew G. Knepley   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
134b2b77a04SHong Zhang   PetscFunctionReturn(0);
135b2b77a04SHong Zhang }
136b2b77a04SHong Zhang 
13797504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT
13897504071SAmlan Barua    Input parameter:
1393564f093SHong Zhang      A - the matrix
14097504071SAmlan Barua      x - the vector on which BDFT will be performed
14197504071SAmlan Barua 
14297504071SAmlan Barua    Output parameter:
14397504071SAmlan Barua      y - vector that stores result of BDFT
14497504071SAmlan Barua */
14597504071SAmlan Barua 
146b2b77a04SHong Zhang #undef __FUNCT__
147b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
148b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
149b2b77a04SHong Zhang {
150b2b77a04SHong Zhang   PetscErrorCode ierr;
151b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
152b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
153f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
154f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
155b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
1561acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
157a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
158a1b6d50cSHong Zhang   fftw_iodim64   *iodims=fftw->iodims;
159a1b6d50cSHong Zhang #else
160a1b6d50cSHong Zhang   fftw_iodim     *iodims=fftw->iodims;
161a1b6d50cSHong Zhang #endif
1621acd80e4SHong Zhang #endif
163b2b77a04SHong Zhang 
164b2b77a04SHong Zhang   PetscFunctionBegin;
165f4fca6d4SMatthew G. Knepley   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
166b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
167b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
168b2b77a04SHong Zhang     switch (ndim) {
169b2b77a04SHong Zhang     case 1:
17058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
171b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
17258a912c5SAmlan Barua #else
173e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
17458a912c5SAmlan Barua #endif
175b2b77a04SHong Zhang       break;
176b2b77a04SHong Zhang     case 2:
17758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
178b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
17958a912c5SAmlan Barua #else
180e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
18158a912c5SAmlan Barua #endif
182b2b77a04SHong Zhang       break;
183b2b77a04SHong Zhang     case 3:
18458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
185b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
18658a912c5SAmlan Barua #else
187e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
18858a912c5SAmlan Barua #endif
189b2b77a04SHong Zhang       break;
190b2b77a04SHong Zhang     default:
19158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
192a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
193a1b6d50cSHong Zhang       fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
194a1b6d50cSHong Zhang #else
1958c1d5ab3SHong Zhang       fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
196a1b6d50cSHong Zhang #endif
19758a912c5SAmlan Barua #else
198a31b9140SHong Zhang       fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
19958a912c5SAmlan Barua #endif
200b2b77a04SHong Zhang       break;
201b2b77a04SHong Zhang     }
202f4fca6d4SMatthew G. Knepley     fftw->binarray  = (PetscScalar *) x_array;
203b2b77a04SHong Zhang     fftw->boutarray = y_array;
204b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
205b2b77a04SHong Zhang   } else { /* use existing plan */
206b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
2077e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
208b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
2097e4bc134SDominic Meiser #else
2107e4bc134SDominic Meiser       fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array);
2117e4bc134SDominic Meiser #endif
212b2b77a04SHong Zhang     } else {
213b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
214b2b77a04SHong Zhang     }
215b2b77a04SHong Zhang   }
216b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
217f4fca6d4SMatthew G. Knepley   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
218b2b77a04SHong Zhang   PetscFunctionReturn(0);
219b2b77a04SHong Zhang }
220b2b77a04SHong Zhang 
22197504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel
22297504071SAmlan Barua    Input parameter:
2233564f093SHong Zhang      A - the matrix
22497504071SAmlan Barua      x - the vector on which FDFT will be performed
22597504071SAmlan Barua 
22697504071SAmlan Barua    Output parameter:
22797504071SAmlan Barua    y   - vector that stores result of FDFT
22897504071SAmlan Barua */
229b2b77a04SHong Zhang #undef __FUNCT__
230b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
231b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
232b2b77a04SHong Zhang {
233b2b77a04SHong Zhang   PetscErrorCode ierr;
234b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
235b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
236f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
237f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
238c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
239ce94432eSBarry Smith   MPI_Comm       comm;
240b2b77a04SHong Zhang 
241b2b77a04SHong Zhang   PetscFunctionBegin;
242ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
243f4fca6d4SMatthew G. Knepley   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
244b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
245b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
246b2b77a04SHong Zhang     switch (ndim) {
247b2b77a04SHong Zhang     case 1:
2483c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
249b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
250ae0a50aaSHong Zhang #else
2514f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
2523c3a9c75SAmlan Barua #endif
253b2b77a04SHong Zhang       break;
254b2b77a04SHong Zhang     case 2:
25597504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
256b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2573c3a9c75SAmlan Barua #else
2583c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2593c3a9c75SAmlan Barua #endif
260b2b77a04SHong Zhang       break;
261b2b77a04SHong Zhang     case 3:
2623c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
263b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2643c3a9c75SAmlan Barua #else
26551d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2663c3a9c75SAmlan Barua #endif
267b2b77a04SHong Zhang       break;
268b2b77a04SHong Zhang     default:
2693c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
270c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2713c3a9c75SAmlan Barua #else
2723c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2733c3a9c75SAmlan Barua #endif
274b2b77a04SHong Zhang       break;
275b2b77a04SHong Zhang     }
276f4fca6d4SMatthew G. Knepley     fftw->finarray  = (PetscScalar *) x_array;
277b2b77a04SHong Zhang     fftw->foutarray = y_array;
278b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
279b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
280b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
281b2b77a04SHong Zhang   } else { /* use existing plan */
282b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
283b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
284b2b77a04SHong Zhang     } else {
285b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
286b2b77a04SHong Zhang     }
287b2b77a04SHong Zhang   }
288b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
289f4fca6d4SMatthew G. Knepley   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
290b2b77a04SHong Zhang   PetscFunctionReturn(0);
291b2b77a04SHong Zhang }
292b2b77a04SHong Zhang 
29397504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT
29497504071SAmlan Barua    Input parameter:
2953564f093SHong Zhang      A - the matrix
29697504071SAmlan Barua      x - the vector on which BDFT will be performed
29797504071SAmlan Barua 
29897504071SAmlan Barua    Output parameter:
29997504071SAmlan Barua      y - vector that stores result of BDFT
30097504071SAmlan Barua */
301b2b77a04SHong Zhang #undef __FUNCT__
302b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
303b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
304b2b77a04SHong Zhang {
305b2b77a04SHong Zhang   PetscErrorCode ierr;
306b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
307b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
308f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
309f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
310c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
311ce94432eSBarry Smith   MPI_Comm       comm;
312b2b77a04SHong Zhang 
313b2b77a04SHong Zhang   PetscFunctionBegin;
314ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
315f4fca6d4SMatthew G. Knepley   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
316b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
317b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
318b2b77a04SHong Zhang     switch (ndim) {
319b2b77a04SHong Zhang     case 1:
3203c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
321b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
322ae0a50aaSHong Zhang #else
3234f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
3243c3a9c75SAmlan Barua #endif
325b2b77a04SHong Zhang       break;
326b2b77a04SHong Zhang     case 2:
32797504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
328b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3293c3a9c75SAmlan Barua #else
3303c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3313c3a9c75SAmlan Barua #endif
332b2b77a04SHong Zhang       break;
333b2b77a04SHong Zhang     case 3:
3343c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
335b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3363c3a9c75SAmlan Barua #else
3373c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3383c3a9c75SAmlan Barua #endif
339b2b77a04SHong Zhang       break;
340b2b77a04SHong Zhang     default:
3413c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
342c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3433c3a9c75SAmlan Barua #else
3443c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3453c3a9c75SAmlan Barua #endif
346b2b77a04SHong Zhang       break;
347b2b77a04SHong Zhang     }
348f4fca6d4SMatthew G. Knepley     fftw->binarray  = (PetscScalar *) x_array;
349b2b77a04SHong Zhang     fftw->boutarray = y_array;
350b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
351b2b77a04SHong Zhang   } else { /* use existing plan */
352b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
353b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
354b2b77a04SHong Zhang     } else {
355b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
356b2b77a04SHong Zhang     }
357b2b77a04SHong Zhang   }
358b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
359f4fca6d4SMatthew G. Knepley   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
360b2b77a04SHong Zhang   PetscFunctionReturn(0);
361b2b77a04SHong Zhang }
362b2b77a04SHong Zhang 
363b2b77a04SHong Zhang #undef __FUNCT__
364b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
365b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
366b2b77a04SHong Zhang {
367b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
368b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
369b2b77a04SHong Zhang   PetscErrorCode ierr;
370b2b77a04SHong Zhang 
371b2b77a04SHong Zhang   PetscFunctionBegin;
372b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
373b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
3748c1d5ab3SHong Zhang   if (fftw->iodims) {
3758c1d5ab3SHong Zhang     free(fftw->iodims);
3768c1d5ab3SHong Zhang   }
377bb5bf6f6SHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
378bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
3796ccf0b3eSHong Zhang   fftw_mpi_cleanup();
380b2b77a04SHong Zhang   PetscFunctionReturn(0);
381b2b77a04SHong Zhang }
382b2b77a04SHong Zhang 
383c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
384b2b77a04SHong Zhang #undef __FUNCT__
385b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
386b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
387b2b77a04SHong Zhang {
388b2b77a04SHong Zhang   PetscErrorCode ierr;
389b2b77a04SHong Zhang   PetscScalar    *array;
390b2b77a04SHong Zhang 
391b2b77a04SHong Zhang   PetscFunctionBegin;
392b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
393b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
394b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
395b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
396b2b77a04SHong Zhang   PetscFunctionReturn(0);
397b2b77a04SHong Zhang }
398b2b77a04SHong Zhang 
3994f7415efSAmlan Barua #undef __FUNCT__
4002a7a6963SBarry Smith #define __FUNCT__ "MatCreateVecsFFTW"
4014be45526SHong Zhang /*@
4022a7a6963SBarry Smith    MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
4034f7415efSAmlan Barua      parallel layout determined by FFTW
4044f7415efSAmlan Barua 
4054f7415efSAmlan Barua    Collective on Mat
4064f7415efSAmlan Barua 
4074f7415efSAmlan Barua    Input Parameter:
4083564f093SHong Zhang .   A - the matrix
4094f7415efSAmlan Barua 
4104f7415efSAmlan Barua    Output Parameter:
411cc55f3b1SHong Zhang +   x - (optional) input vector of forward FFTW
412cc55f3b1SHong Zhang -   y - (optional) output vector of forward FFTW
413cc55f3b1SHong Zhang -   z - (optional) output vector of backward FFTW
4144f7415efSAmlan Barua 
4154f7415efSAmlan Barua   Level: advanced
4163564f093SHong Zhang 
41797504071SAmlan Barua   Note: The parallel layout of output of forward FFTW is always same as the input
41897504071SAmlan Barua         of the backward FFTW. But parallel layout of the input vector of forward
41997504071SAmlan Barua         FFTW might not be same as the output of backward FFTW.
42097504071SAmlan Barua         Also note that we need to provide enough space while doing parallel real transform.
42197504071SAmlan Barua         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
42297504071SAmlan Barua         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
423e0ec536eSAmlan Barua         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
424e0ec536eSAmlan Barua         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
425e0ec536eSAmlan Barua         zeros if it is odd only one zero is needed.
426e0ec536eSAmlan Barua         Lastly one needs some scratch space at the end of data set in each process. alloc_local
427e0ec536eSAmlan Barua         figures out how much space is needed, i.e. it figures out the data+scratch space for
428e0ec536eSAmlan Barua         each processor and returns that.
4294f7415efSAmlan Barua 
4304f7415efSAmlan Barua .seealso: MatCreateFFTW()
4314be45526SHong Zhang @*/
4322a7a6963SBarry Smith PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
4334be45526SHong Zhang {
4344be45526SHong Zhang   PetscErrorCode ierr;
435b4c3921fSHong Zhang 
4364be45526SHong Zhang   PetscFunctionBegin;
437*163d334eSBarry Smith   ierr = PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
4384be45526SHong Zhang   PetscFunctionReturn(0);
4394be45526SHong Zhang }
4404be45526SHong Zhang 
4414be45526SHong Zhang #undef __FUNCT__
4422a7a6963SBarry Smith #define __FUNCT__ "MatCreateVecsFFTW_FFTW"
4432a7a6963SBarry Smith PetscErrorCode  MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
4444f7415efSAmlan Barua {
4454f7415efSAmlan Barua   PetscErrorCode ierr;
4464f7415efSAmlan Barua   PetscMPIInt    size,rank;
447ce94432eSBarry Smith   MPI_Comm       comm;
4484f7415efSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
4494f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
4509496c9aeSAmlan Barua   PetscInt       N     = fft->N;
4514f7415efSAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
4524f7415efSAmlan Barua 
4534f7415efSAmlan Barua   PetscFunctionBegin;
4544f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
4554f7415efSAmlan Barua   PetscValidType(A,1);
456ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4574f7415efSAmlan Barua 
4584f8276c3SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
4594f8276c3SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
4604f7415efSAmlan Barua   if (size == 1) { /* sequential case */
4614f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4624f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
4634f7415efSAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
4644f7415efSAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
4654f7415efSAmlan Barua #else
4668ca4c034SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
4678ca4c034SAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
4688ca4c034SAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
4694f7415efSAmlan Barua #endif
47097504071SAmlan Barua   } else { /* parallel cases */
4714f7415efSAmlan Barua     ptrdiff_t    alloc_local,local_n0,local_0_start;
4729496c9aeSAmlan Barua     ptrdiff_t    local_n1;
4739496c9aeSAmlan Barua     fftw_complex *data_fout;
4749496c9aeSAmlan Barua     ptrdiff_t    local_1_start;
4759496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4769496c9aeSAmlan Barua     fftw_complex *data_fin,*data_bout;
4779496c9aeSAmlan Barua #else
4784f7415efSAmlan Barua     double    *data_finr,*data_boutr;
4796ccf0b3eSHong Zhang     PetscInt  n1,N1;
4809496c9aeSAmlan Barua     ptrdiff_t temp;
4819496c9aeSAmlan Barua #endif
4829496c9aeSAmlan Barua 
4834f7415efSAmlan Barua     switch (ndim) {
4844f7415efSAmlan Barua     case 1:
48557625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4864f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
48757625b84SAmlan Barua #else
48857625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
48957625b84SAmlan Barua       if (fin) {
49057625b84SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
491778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
49226fbe8dcSKarl Rupp 
49357625b84SAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
49457625b84SAmlan Barua       }
49557625b84SAmlan Barua       if (fout) {
49657625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
497778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
49826fbe8dcSKarl Rupp 
49957625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
50057625b84SAmlan Barua       }
50157625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
50257625b84SAmlan Barua       if (bout) {
50357625b84SAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
504778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
50526fbe8dcSKarl Rupp 
50657625b84SAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
50757625b84SAmlan Barua       }
50857625b84SAmlan Barua       break;
50957625b84SAmlan Barua #endif
5104f7415efSAmlan Barua     case 2:
51197504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
5124f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5134f7415efSAmlan Barua       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
5144f7415efSAmlan Barua       if (fin) {
5154f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
516778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
51726fbe8dcSKarl Rupp 
5184f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5194f7415efSAmlan Barua       }
5204f7415efSAmlan Barua       if (bout) {
5214f7415efSAmlan Barua         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
522778a2246SBarry Smith         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
52326fbe8dcSKarl Rupp 
5244f7415efSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5254f7415efSAmlan Barua       }
52657625b84SAmlan Barua       if (fout) {
52757625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
528778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
52926fbe8dcSKarl Rupp 
53057625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
53157625b84SAmlan Barua       }
5324f7415efSAmlan Barua #else
5334f7415efSAmlan Barua       /* Get local size */
5344f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
5354f7415efSAmlan Barua       if (fin) {
5364f7415efSAmlan Barua         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
537778a2246SBarry Smith         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
53826fbe8dcSKarl Rupp 
5394f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5404f7415efSAmlan Barua       }
5414f7415efSAmlan Barua       if (fout) {
5424f7415efSAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
543778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
54426fbe8dcSKarl Rupp 
5454f7415efSAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
5464f7415efSAmlan Barua       }
5474f7415efSAmlan Barua       if (bout) {
5484f7415efSAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
549778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
55026fbe8dcSKarl Rupp 
5514f7415efSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5524f7415efSAmlan Barua       }
5534f7415efSAmlan Barua #endif
5544f7415efSAmlan Barua       break;
5554f7415efSAmlan Barua     case 3:
5564f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5574f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5584f7415efSAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
5594f7415efSAmlan Barua       if (fin) {
5604f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
561778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
56226fbe8dcSKarl Rupp 
5634f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5644f7415efSAmlan Barua       }
5654f7415efSAmlan Barua       if (bout) {
5664f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
567778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
56826fbe8dcSKarl Rupp 
5694f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5704f7415efSAmlan Barua       }
5713564f093SHong Zhang 
57257625b84SAmlan Barua       if (fout) {
57357625b84SAmlan Barua         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
574778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
57526fbe8dcSKarl Rupp 
57657625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
57757625b84SAmlan Barua       }
5784f7415efSAmlan Barua #else
5790c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
5800c9b18e4SAmlan Barua       if (fin) {
5810c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
582778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
58326fbe8dcSKarl Rupp 
5840c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5850c9b18e4SAmlan Barua       }
5860c9b18e4SAmlan Barua       if (fout) {
5870c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
588778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
58926fbe8dcSKarl Rupp 
5900c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5910c9b18e4SAmlan Barua       }
5920c9b18e4SAmlan Barua       if (bout) {
5930c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
594778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
59526fbe8dcSKarl Rupp 
5960c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5970c9b18e4SAmlan Barua       }
5984f7415efSAmlan Barua #endif
5994f7415efSAmlan Barua       break;
6004f7415efSAmlan Barua     default:
6014f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6024f7415efSAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
60326fbe8dcSKarl Rupp 
6044f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
60526fbe8dcSKarl Rupp 
6064f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
6074f7415efSAmlan Barua       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
60826fbe8dcSKarl Rupp 
6094f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
6104f7415efSAmlan Barua 
6114f7415efSAmlan Barua       if (fin) {
6124f7415efSAmlan Barua         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
613778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
61426fbe8dcSKarl Rupp 
6154f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
6164f7415efSAmlan Barua       }
6174f7415efSAmlan Barua       if (bout) {
6184f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
619778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
62026fbe8dcSKarl Rupp 
6219496c9aeSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
6224f7415efSAmlan Barua       }
6233564f093SHong Zhang 
62457625b84SAmlan Barua       if (fout) {
62557625b84SAmlan Barua         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
626778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
62726fbe8dcSKarl Rupp 
62857625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
62957625b84SAmlan Barua       }
6304f7415efSAmlan Barua #else
6310c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
6320c9b18e4SAmlan Barua       if (fin) {
6330c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
634778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
63526fbe8dcSKarl Rupp 
6360c9b18e4SAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
6370c9b18e4SAmlan Barua       }
6380c9b18e4SAmlan Barua       if (fout) {
6390c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
640778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
64126fbe8dcSKarl Rupp 
6420c9b18e4SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
6430c9b18e4SAmlan Barua       }
6440c9b18e4SAmlan Barua       if (bout) {
6450c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
646778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
64726fbe8dcSKarl Rupp 
6480c9b18e4SAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
6490c9b18e4SAmlan Barua       }
6504f7415efSAmlan Barua #endif
6514f7415efSAmlan Barua       break;
6524f7415efSAmlan Barua     }
6534f7415efSAmlan Barua   }
6544f7415efSAmlan Barua   PetscFunctionReturn(0);
6554f7415efSAmlan Barua }
6564f7415efSAmlan Barua 
657c92418ccSAmlan Barua #undef __FUNCT__
65874a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW"
6593564f093SHong Zhang /*@
6603564f093SHong Zhang    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
66154efbe56SHong Zhang 
6623564f093SHong Zhang    Collective on Mat
6633564f093SHong Zhang 
6643564f093SHong Zhang    Input Parameters:
6653564f093SHong Zhang +  A - FFTW matrix
6663564f093SHong Zhang -  x - the PETSc vector
6673564f093SHong Zhang 
6683564f093SHong Zhang    Output Parameters:
6693564f093SHong Zhang .  y - the FFTW vector
6703564f093SHong Zhang 
671b2b77a04SHong Zhang   Options Database Keys:
6723564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags
673b2b77a04SHong Zhang 
674b2b77a04SHong Zhang    Level: intermediate
6753564f093SHong Zhang 
67697504071SAmlan Barua    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
67797504071SAmlan Barua          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
67897504071SAmlan Barua          zeros. This routine does that job by scattering operation.
67997504071SAmlan Barua 
6803564f093SHong Zhang .seealso: VecScatterFFTWToPetsc()
6813564f093SHong Zhang @*/
6823564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
6833564f093SHong Zhang {
6843564f093SHong Zhang   PetscErrorCode ierr;
685b2b77a04SHong Zhang 
6863564f093SHong Zhang   PetscFunctionBegin;
687*163d334eSBarry Smith   ierr = PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
6883564f093SHong Zhang   PetscFunctionReturn(0);
6893564f093SHong Zhang }
6903c3a9c75SAmlan Barua 
6913c3a9c75SAmlan Barua #undef __FUNCT__
6921986ecc6SHong Zhang #define __FUNCT__ "VecScatterPetscToFFTW_FFTW"
69374a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
6943c3a9c75SAmlan Barua {
6953c3a9c75SAmlan Barua   PetscErrorCode ierr;
696ce94432eSBarry Smith   MPI_Comm       comm;
6973c3a9c75SAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
6983c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
6999496c9aeSAmlan Barua   PetscInt       N     =fft->N;
700b5d11533SAmlan Barua   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
7019496c9aeSAmlan Barua   PetscInt       low;
7027a21806cSHong Zhang   PetscMPIInt    rank,size;
7037a21806cSHong Zhang   PetscInt       vsize,vsize1;
7047a21806cSHong Zhang   ptrdiff_t      local_n0,local_0_start;
7059496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
7063c3a9c75SAmlan Barua   VecScatter     vecscat;
7073c3a9c75SAmlan Barua   IS             list1,list2;
7089496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
7099496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
7109496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
711a31b9140SHong Zhang   PetscInt       NM;
7129496c9aeSAmlan Barua   ptrdiff_t      temp;
7139496c9aeSAmlan Barua #endif
7143c3a9c75SAmlan Barua 
7153564f093SHong Zhang   PetscFunctionBegin;
716ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
717f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
718f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
7190298fd71SBarry Smith   ierr = VecGetOwnershipRange(y,&low,NULL);
7203c3a9c75SAmlan Barua 
7213564f093SHong Zhang   if (size==1) {
7228ca4c034SAmlan Barua     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
7238ca4c034SAmlan Barua     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
7249496c9aeSAmlan Barua     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
7256971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
7266971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7276971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7286971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
729b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
7303564f093SHong Zhang   } else {
7313c3a9c75SAmlan Barua     switch (ndim) {
7323c3a9c75SAmlan Barua     case 1:
73364657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7347a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
73526fbe8dcSKarl Rupp 
7364f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);
7374f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,low,1,&list2);
73864657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
73964657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
74064657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
74164657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
74264657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
74364657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
74464657d84SAmlan Barua #else
745e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
74664657d84SAmlan Barua #endif
7473c3a9c75SAmlan Barua       break;
7483c3a9c75SAmlan Barua     case 2:
749bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7507a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
75126fbe8dcSKarl Rupp 
7524f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
7534f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
754bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
755bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
756bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
757bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
758bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
759bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
760bd59e6c6SAmlan Barua #else
761c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
76226fbe8dcSKarl Rupp 
763854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
764854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
7653c3a9c75SAmlan Barua 
7663564f093SHong Zhang       if (dim[1]%2==0) {
7673c3a9c75SAmlan Barua         NM = dim[1]+2;
7683564f093SHong Zhang       } else {
7693c3a9c75SAmlan Barua         NM = dim[1]+1;
7703564f093SHong Zhang       }
7713c3a9c75SAmlan Barua       for (i=0; i<local_n0; i++) {
7723c3a9c75SAmlan Barua         for (j=0; j<dim[1]; j++) {
7733c3a9c75SAmlan Barua           tempindx  = i*dim[1] + j;
7743c3a9c75SAmlan Barua           tempindx1 = i*NM + j;
77526fbe8dcSKarl Rupp 
7765b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
7773c3a9c75SAmlan Barua           indx2[tempindx]=low+tempindx1;
7783c3a9c75SAmlan Barua         }
7793c3a9c75SAmlan Barua       }
7803c3a9c75SAmlan Barua 
7813c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
7823c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
7833c3a9c75SAmlan Barua 
784f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
785f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
786f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
787f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
788b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
789b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
790b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
791b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
792bd59e6c6SAmlan Barua #endif
7939496c9aeSAmlan Barua       break;
7943c3a9c75SAmlan Barua 
7953c3a9c75SAmlan Barua     case 3:
796bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7977a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
79826fbe8dcSKarl Rupp 
7994f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
8004f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
801bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
802bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
803bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
804bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
805bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
806bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
807bd59e6c6SAmlan Barua #else
808f1ade23cSHong Zhang       /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
809f1ade23cSHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform");
8107a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
81126fbe8dcSKarl Rupp 
812854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
813854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
81451d1eed7SAmlan Barua 
815db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
816db4deed7SKarl Rupp       else             NM = dim[2]+1;
81751d1eed7SAmlan Barua 
81851d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
81951d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
82051d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
82151d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
82251d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
82326fbe8dcSKarl Rupp 
82451d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
82551d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
82651d1eed7SAmlan Barua           }
82751d1eed7SAmlan Barua         }
82851d1eed7SAmlan Barua       }
82951d1eed7SAmlan Barua 
83051d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
83151d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
83251d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
83351d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
83451d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
83551d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
836b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
837b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
838b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
839b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
840bd59e6c6SAmlan Barua #endif
8419496c9aeSAmlan Barua       break;
8423c3a9c75SAmlan Barua 
8433c3a9c75SAmlan Barua     default:
844bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
8457a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
84626fbe8dcSKarl Rupp 
8474f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
8484f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
849bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
850bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
851bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
852bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
853bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
854bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
855bd59e6c6SAmlan Barua #else
856f1ade23cSHong Zhang       /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
857f1ade23cSHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform");
858e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
85926fbe8dcSKarl Rupp 
860e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
86126fbe8dcSKarl Rupp 
8627a21806cSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
86326fbe8dcSKarl Rupp 
864e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
865e5d7f247SAmlan Barua 
866e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
867e5d7f247SAmlan Barua 
868854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
869854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
870e5d7f247SAmlan Barua 
871db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
872db4deed7SKarl Rupp       else                  NM = 1;
873e5d7f247SAmlan Barua 
8746971673cSAmlan Barua       j = low;
8753564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
8766971673cSAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
8776971673cSAmlan Barua         indx2[i] = j;
87826fbe8dcSKarl Rupp         if (k%dim[ndim-1]==0) j+=NM;
8796971673cSAmlan Barua         j++;
8806971673cSAmlan Barua       }
8816971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
8826971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
8836971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
8846971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8856971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8866971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
887b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
888b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
889b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
890b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
891bd59e6c6SAmlan Barua #endif
8929496c9aeSAmlan Barua       break;
8933c3a9c75SAmlan Barua     }
894e81bb599SAmlan Barua   }
8953564f093SHong Zhang   PetscFunctionReturn(0);
8963c3a9c75SAmlan Barua }
8973c3a9c75SAmlan Barua 
8983c3a9c75SAmlan Barua #undef __FUNCT__
89974a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc"
9003564f093SHong Zhang /*@
9013564f093SHong Zhang    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
9023564f093SHong Zhang 
9033564f093SHong Zhang    Collective on Mat
9043564f093SHong Zhang 
9053564f093SHong Zhang     Input Parameters:
9063564f093SHong Zhang +   A - FFTW matrix
9073564f093SHong Zhang -   x - FFTW vector
9083564f093SHong Zhang 
9093564f093SHong Zhang    Output Parameters:
9103564f093SHong Zhang .  y - PETSc vector
9113564f093SHong Zhang 
9123564f093SHong Zhang    Level: intermediate
9133564f093SHong Zhang 
9143564f093SHong Zhang    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
9153564f093SHong Zhang          VecScatterFFTWToPetsc removes those extra zeros.
9163564f093SHong Zhang 
9173564f093SHong Zhang .seealso: VecScatterPetscToFFTW()
9183564f093SHong Zhang @*/
91974a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
9203c3a9c75SAmlan Barua {
9213c3a9c75SAmlan Barua   PetscErrorCode ierr;
9225fd66863SKarl Rupp 
9233c3a9c75SAmlan Barua   PetscFunctionBegin;
924*163d334eSBarry Smith   ierr = PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
9253c3a9c75SAmlan Barua   PetscFunctionReturn(0);
9263c3a9c75SAmlan Barua }
92754efbe56SHong Zhang 
9283c3a9c75SAmlan Barua #undef __FUNCT__
92974a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
93074a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
9315b3e41ffSAmlan Barua {
9325b3e41ffSAmlan Barua   PetscErrorCode ierr;
933ce94432eSBarry Smith   MPI_Comm       comm;
9345b3e41ffSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
9355b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
9369496c9aeSAmlan Barua   PetscInt       N     = fft->N;
937b3655f67SAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
9389496c9aeSAmlan Barua   PetscInt       low;
9397a21806cSHong Zhang   PetscMPIInt    rank,size;
9407a21806cSHong Zhang   ptrdiff_t      local_n0,local_0_start;
9419496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
9425b3e41ffSAmlan Barua   VecScatter     vecscat;
9435b3e41ffSAmlan Barua   IS             list1,list2;
9449496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
9459496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
9469496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
947a31b9140SHong Zhang   PetscInt       NM;
9489496c9aeSAmlan Barua   ptrdiff_t      temp;
9499496c9aeSAmlan Barua #endif
9509496c9aeSAmlan Barua 
9513564f093SHong Zhang   PetscFunctionBegin;
952ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
9535b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
9545b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
9550298fd71SBarry Smith   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
9565b3e41ffSAmlan Barua 
957e81bb599SAmlan Barua   if (size==1) {
958b3655f67SAmlan Barua     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
9596971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
9606971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9616971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9626971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
963b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
964e81bb599SAmlan Barua 
9653564f093SHong Zhang   } else {
966e81bb599SAmlan Barua     switch (ndim) {
967e81bb599SAmlan Barua     case 1:
96864657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
9697a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
97026fbe8dcSKarl Rupp 
9714f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);
9724f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,low,1,&list2);
97364657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
97464657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
97564657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
97664657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
97764657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
97864657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
97964657d84SAmlan Barua #else
9806a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
98164657d84SAmlan Barua #endif
9825b3e41ffSAmlan Barua       break;
9835b3e41ffSAmlan Barua     case 2:
984bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
9857a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
98626fbe8dcSKarl Rupp 
9874f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
9884f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
989bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
990bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
991bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
993bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
994bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
995bd59e6c6SAmlan Barua #else
9967a21806cSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
99726fbe8dcSKarl Rupp 
998854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
999854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
10005b3e41ffSAmlan Barua 
1001db4deed7SKarl Rupp       if (dim[1]%2==0) NM = dim[1]+2;
1002db4deed7SKarl Rupp       else             NM = dim[1]+1;
10035b3e41ffSAmlan Barua 
10045b3e41ffSAmlan Barua       for (i=0; i<local_n0; i++) {
10055b3e41ffSAmlan Barua         for (j=0; j<dim[1]; j++) {
10065b3e41ffSAmlan Barua           tempindx = i*dim[1] + j;
10075b3e41ffSAmlan Barua           tempindx1 = i*NM + j;
100826fbe8dcSKarl Rupp 
10095b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
10105b3e41ffSAmlan Barua           indx2[tempindx]=low+tempindx1;
10115b3e41ffSAmlan Barua         }
10125b3e41ffSAmlan Barua       }
10135b3e41ffSAmlan Barua 
10145b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
10155b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
10165b3e41ffSAmlan Barua 
10175b3e41ffSAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
10185b3e41ffSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10195b3e41ffSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10205b3e41ffSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1021b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1022b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1023b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1024b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1025bd59e6c6SAmlan Barua #endif
10269496c9aeSAmlan Barua       break;
10275b3e41ffSAmlan Barua     case 3:
1028bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10297a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
103026fbe8dcSKarl Rupp 
10314f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
10324f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
1033bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1034bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1035bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1037bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1038bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1039bd59e6c6SAmlan Barua #else
10407a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
104126fbe8dcSKarl Rupp 
1042854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1043854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
104451d1eed7SAmlan Barua 
1045db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
1046db4deed7SKarl Rupp       else             NM = dim[2]+1;
104751d1eed7SAmlan Barua 
104851d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
104951d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
105051d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
105151d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
105251d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
105326fbe8dcSKarl Rupp 
105451d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
105551d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
105651d1eed7SAmlan Barua           }
105751d1eed7SAmlan Barua         }
105851d1eed7SAmlan Barua       }
105951d1eed7SAmlan Barua 
106051d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
106151d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
106251d1eed7SAmlan Barua 
106351d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
106451d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
106551d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
106651d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1067b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1068b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1069b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1070b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1071bd59e6c6SAmlan Barua #endif
10729496c9aeSAmlan Barua       break;
10735b3e41ffSAmlan Barua     default:
1074bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10757a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
107626fbe8dcSKarl Rupp 
10774f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
10784f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1079bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1080bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1081bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1082bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1083bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1084bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1085bd59e6c6SAmlan Barua #else
1086ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
108726fbe8dcSKarl Rupp 
1088ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
108926fbe8dcSKarl Rupp 
1090c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
109126fbe8dcSKarl Rupp 
1092ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1093ba6e06d0SAmlan Barua 
1094ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1095ba6e06d0SAmlan Barua 
1096854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1097854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1098ba6e06d0SAmlan Barua 
1099db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
1100db4deed7SKarl Rupp       else                  NM = 1;
1101ba6e06d0SAmlan Barua 
1102ba6e06d0SAmlan Barua       j = low;
11033564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1104ba6e06d0SAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
1105ba6e06d0SAmlan Barua         indx2[i] = j;
11063564f093SHong Zhang         if (k%dim[ndim-1]==0) j+=NM;
1107ba6e06d0SAmlan Barua         j++;
1108ba6e06d0SAmlan Barua       }
1109ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1110ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1111ba6e06d0SAmlan Barua 
1112ba6e06d0SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1113ba6e06d0SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114ba6e06d0SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1115ba6e06d0SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1116b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1117b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1118b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1119b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1120bd59e6c6SAmlan Barua #endif
11219496c9aeSAmlan Barua       break;
11225b3e41ffSAmlan Barua     }
1123e81bb599SAmlan Barua   }
11243564f093SHong Zhang   PetscFunctionReturn(0);
11255b3e41ffSAmlan Barua }
11265b3e41ffSAmlan Barua 
11275b3e41ffSAmlan Barua #undef __FUNCT__
11283c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
11293c3a9c75SAmlan Barua /*
11303564f093SHong Zhang     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
11313564f093SHong Zhang 
11323c3a9c75SAmlan Barua   Options Database Keys:
11333c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
11343c3a9c75SAmlan Barua 
11353c3a9c75SAmlan Barua    Level: intermediate
11363c3a9c75SAmlan Barua 
11373c3a9c75SAmlan Barua */
11388cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1139b2b77a04SHong Zhang {
1140b2b77a04SHong Zhang   PetscErrorCode ierr;
1141ce94432eSBarry Smith   MPI_Comm       comm;
1142b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1143b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1144b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
11455274ed1bSHong Zhang   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
11465274ed1bSHong Zhang   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1147b2b77a04SHong Zhang   PetscBool      flg;
11484f48bc67SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr;
114911902ff2SHong Zhang   PetscMPIInt    size,rank;
11509496c9aeSAmlan Barua   ptrdiff_t      *pdim;
11519496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
11529496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11539496c9aeSAmlan Barua   ptrdiff_t      temp;
11548ca4c034SAmlan Barua   PetscInt       N1,tot_dim;
11554f48bc67SAmlan Barua #else
11564f48bc67SAmlan Barua   PetscInt       n1;
11579496c9aeSAmlan Barua #endif
11589496c9aeSAmlan Barua 
1159b2b77a04SHong Zhang   PetscFunctionBegin;
1160ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1161b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
116211902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1163c92418ccSAmlan Barua 
11641878d478SAmlan Barua   fftw_mpi_init();
116511902ff2SHong Zhang   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
116611902ff2SHong Zhang   pdim[0] = dim[0];
11678ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11688ca4c034SAmlan Barua   tot_dim = 2*dim[0];
11698ca4c034SAmlan Barua #endif
11703564f093SHong Zhang   for (ctr=1; ctr<ndim; ctr++) {
11716882372aSHong Zhang     partial_dim *= dim[ctr];
117211902ff2SHong Zhang     pdim[ctr]    = dim[ctr];
11738ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1174db4deed7SKarl Rupp     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1175db4deed7SKarl Rupp     else             tot_dim *= dim[ctr];
11768ca4c034SAmlan Barua #endif
11776882372aSHong Zhang   }
11783c3a9c75SAmlan Barua 
1179b2b77a04SHong Zhang   if (size == 1) {
1180e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1181b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1182b2b77a04SHong Zhang     n    = N;
1183e81bb599SAmlan Barua #else
1184e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1185e81bb599SAmlan Barua     n    = tot_dim;
1186e81bb599SAmlan Barua #endif
1187e81bb599SAmlan Barua 
1188b2b77a04SHong Zhang   } else {
11897a21806cSHong Zhang     ptrdiff_t local_n0,local_0_start;
1190b2b77a04SHong Zhang     switch (ndim) {
1191b2b77a04SHong Zhang     case 1:
11923c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11933c3a9c75SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1194e5d7f247SAmlan Barua #else
11957a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
119626fbe8dcSKarl Rupp 
11976882372aSHong Zhang       n    = (PetscInt)local_n0;
11989496c9aeSAmlan Barua       n1   = (PetscInt)local_n1;
11999496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1200e5d7f247SAmlan Barua #endif
1201b2b77a04SHong Zhang       break;
1202b2b77a04SHong Zhang     case 2:
12035b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
12047a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1205b2b77a04SHong Zhang       /*
1206b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
12070ec8b6e3SBarry Smith        PetscSynchronizedFlush(comm,PETSC_STDOUT);
1208b2b77a04SHong Zhang        */
1209b2b77a04SHong Zhang       n    = (PetscInt)local_n0*dim[1];
1210b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
12115b3e41ffSAmlan Barua #else
1212c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
121326fbe8dcSKarl Rupp 
12145b3e41ffSAmlan Barua       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
1215c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
12165b3e41ffSAmlan Barua #endif
1217b2b77a04SHong Zhang       break;
1218b2b77a04SHong Zhang     case 3:
121951d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12207a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
122126fbe8dcSKarl Rupp 
12226882372aSHong Zhang       n    = (PetscInt)local_n0*dim[1]*dim[2];
12236882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
122451d1eed7SAmlan Barua #else
1225c3eba89aSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
122626fbe8dcSKarl Rupp 
122751d1eed7SAmlan Barua       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1228c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
122951d1eed7SAmlan Barua #endif
1230b2b77a04SHong Zhang       break;
1231b2b77a04SHong Zhang     default:
1232b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12337a21806cSHong Zhang       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
123426fbe8dcSKarl Rupp 
12356882372aSHong Zhang       n    = (PetscInt)local_n0*partial_dim;
12366882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1237b3a17365SAmlan Barua #else
1238b3a17365SAmlan Barua       temp = pdim[ndim-1];
123926fbe8dcSKarl Rupp 
1240b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
124126fbe8dcSKarl Rupp 
1242c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
124326fbe8dcSKarl Rupp 
1244b3a17365SAmlan Barua       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1245b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
124626fbe8dcSKarl Rupp 
1247b3a17365SAmlan Barua       pdim[ndim-1] = temp;
124826fbe8dcSKarl Rupp 
1249c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1250b3a17365SAmlan Barua #endif
1251b2b77a04SHong Zhang       break;
1252b2b77a04SHong Zhang     }
1253b2b77a04SHong Zhang   }
1254b2b77a04SHong Zhang   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1255b00a9115SJed Brown   ierr      = PetscNewLog(A,&fftw);CHKERRQ(ierr);
1256b2b77a04SHong Zhang   fft->data = (void*)fftw;
1257b2b77a04SHong Zhang 
1258b2b77a04SHong Zhang   fft->n            = n;
12590c74a584SJed Brown   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1260e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
126126fbe8dcSKarl Rupp 
12625e806cb5SMatthew G. Knepley   ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr);
12638c1d5ab3SHong Zhang   if (size == 1) {
1264a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
1265a1b6d50cSHong Zhang     fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1266a1b6d50cSHong Zhang #else
12678c1d5ab3SHong Zhang     fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1268a1b6d50cSHong Zhang #endif
12698c1d5ab3SHong Zhang   }
127026fbe8dcSKarl Rupp 
1271b1301b2eSHong Zhang   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1272c92418ccSAmlan Barua 
1273b2b77a04SHong Zhang   fftw->p_forward  = 0;
1274b2b77a04SHong Zhang   fftw->p_backward = 0;
1275b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1276b2b77a04SHong Zhang 
1277b2b77a04SHong Zhang   if (size == 1) {
1278b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1279b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1280b2b77a04SHong Zhang   } else {
1281b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1282b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1283b2b77a04SHong Zhang   }
1284b2b77a04SHong Zhang   fft->matdestroy = MatDestroy_FFTW;
1285b2b77a04SHong Zhang   A->assembled    = PETSC_TRUE;
12864a52bad8SHong Zhang   A->preallocated = PETSC_TRUE;
128726fbe8dcSKarl Rupp 
12882a7a6963SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr);
1289bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
1290bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1291b2b77a04SHong Zhang 
1292b2b77a04SHong Zhang   /* get runtime options */
1293ce94432eSBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
12945274ed1bSHong Zhang   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr);
12955274ed1bSHong Zhang   if (flg) {
12965274ed1bSHong Zhang     fftw->p_flag = iplans[p_flag];
12975274ed1bSHong Zhang   }
12984a52bad8SHong Zhang   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1299b2b77a04SHong Zhang   PetscFunctionReturn(0);
1300b2b77a04SHong Zhang }
13013c3a9c75SAmlan Barua 
13023c3a9c75SAmlan Barua 
13033c3a9c75SAmlan Barua 
13043c3a9c75SAmlan Barua 
1305