xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 1acd80e4de76bcd495fa62e8872e763f223ce968)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t    ndim_fftw,*dim_fftw;
14a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
15a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
16a1b6d50cSHong Zhang #else
178c1d5ab3SHong Zhang   fftw_iodim   *iodims;
18a1b6d50cSHong Zhang #endif
19e5d7f247SAmlan Barua   PetscInt     partial_dim;
20b2b77a04SHong Zhang   fftw_plan    p_forward,p_backward;
21b2b77a04SHong Zhang   unsigned     p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
22b2b77a04SHong Zhang   PetscScalar  *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
23b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
24b2b77a04SHong Zhang } Mat_FFTW;
25b2b77a04SHong Zhang 
26b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
27b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
28b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
29b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
30b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
31b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
32b2b77a04SHong Zhang 
3397504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel
3497504071SAmlan Barua    Input parameter:
353564f093SHong Zhang      A - the matrix
3697504071SAmlan Barua      x - the vector on which FDFT will be performed
3797504071SAmlan Barua 
3897504071SAmlan Barua    Output parameter:
3997504071SAmlan Barua      y - vector that stores result of FDFT
4097504071SAmlan Barua */
41b2b77a04SHong Zhang #undef __FUNCT__
42b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
43b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
44b2b77a04SHong Zhang {
45b2b77a04SHong Zhang   PetscErrorCode ierr;
46b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
47b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
48b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
49*1acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
50a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
51a1b6d50cSHong Zhang   fftw_iodim64   *iodims;
52a1b6d50cSHong Zhang #else
538c1d5ab3SHong Zhang   fftw_iodim     *iodims;
54a1b6d50cSHong Zhang #endif
55*1acd80e4SHong Zhang   PetscInt       i;
56*1acd80e4SHong Zhang #endif
57*1acd80e4SHong Zhang   PetscInt       ndim = fft->ndim,*dim = fft->dim;
58b2b77a04SHong Zhang 
59b2b77a04SHong Zhang   PetscFunctionBegin;
60b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
61b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
62b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
63b2b77a04SHong Zhang     switch (ndim) {
64b2b77a04SHong Zhang     case 1:
6558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
66b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6758a912c5SAmlan Barua #else
6858a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
6958a912c5SAmlan Barua #endif
70b2b77a04SHong Zhang       break;
71b2b77a04SHong Zhang     case 2:
7258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
73b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
7458a912c5SAmlan Barua #else
7558a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
7658a912c5SAmlan Barua #endif
77b2b77a04SHong Zhang       break;
78b2b77a04SHong Zhang     case 3:
7958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
80b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
8158a912c5SAmlan Barua #else
8258a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
8358a912c5SAmlan Barua #endif
84b2b77a04SHong Zhang       break;
85b2b77a04SHong Zhang     default:
8658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
87a1b6d50cSHong Zhang       iodims = fftw->iodims;
88a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
898c1d5ab3SHong Zhang       if (ndim) {
90a1b6d50cSHong Zhang         iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
918c1d5ab3SHong Zhang         iodims[ndim-1].is = iodims[ndim-1].os = 1;
928c1d5ab3SHong Zhang         for (i=ndim-2; i>=0; --i) {
93a1b6d50cSHong Zhang           iodims[i].n = (ptrdiff_t)dim[i];
948c1d5ab3SHong Zhang           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
958c1d5ab3SHong Zhang         }
968c1d5ab3SHong Zhang       }
97a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
98a1b6d50cSHong Zhang #else
99a1b6d50cSHong Zhang       if (ndim) {
100a1b6d50cSHong Zhang         iodims[ndim-1].n = (int)dim[ndim-1];
101a1b6d50cSHong Zhang         iodims[ndim-1].is = iodims[ndim-1].os = 1;
102a1b6d50cSHong Zhang         for (i=ndim-2; i>=0; --i) {
103a1b6d50cSHong Zhang           iodims[i].n = (int)dim[i];
104a1b6d50cSHong Zhang           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
105a1b6d50cSHong Zhang         }
106a1b6d50cSHong Zhang       }
107a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
108a1b6d50cSHong Zhang #endif
109a1b6d50cSHong Zhang 
11058a912c5SAmlan Barua #else
11158a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
11258a912c5SAmlan Barua #endif
113b2b77a04SHong Zhang       break;
114b2b77a04SHong Zhang     }
115b2b77a04SHong Zhang     fftw->finarray  = x_array;
116b2b77a04SHong Zhang     fftw->foutarray = y_array;
117b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
118b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
119b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
120b2b77a04SHong Zhang   } else { /* use existing plan */
121b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
122b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
123b2b77a04SHong Zhang     } else {
124b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
125b2b77a04SHong Zhang     }
126b2b77a04SHong Zhang   }
127b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
128b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
129b2b77a04SHong Zhang   PetscFunctionReturn(0);
130b2b77a04SHong Zhang }
131b2b77a04SHong Zhang 
13297504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT
13397504071SAmlan Barua    Input parameter:
1343564f093SHong Zhang      A - the matrix
13597504071SAmlan Barua      x - the vector on which BDFT will be performed
13697504071SAmlan Barua 
13797504071SAmlan Barua    Output parameter:
13897504071SAmlan Barua      y - vector that stores result of BDFT
13997504071SAmlan Barua */
14097504071SAmlan Barua 
141b2b77a04SHong Zhang #undef __FUNCT__
142b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
143b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
144b2b77a04SHong Zhang {
145b2b77a04SHong Zhang   PetscErrorCode ierr;
146b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
147b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
148b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
149b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
150*1acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
151a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
152a1b6d50cSHong Zhang   fftw_iodim64   *iodims=fftw->iodims;
153a1b6d50cSHong Zhang #else
154a1b6d50cSHong Zhang   fftw_iodim     *iodims=fftw->iodims;
155a1b6d50cSHong Zhang #endif
156*1acd80e4SHong Zhang #endif
157b2b77a04SHong Zhang 
158b2b77a04SHong Zhang   PetscFunctionBegin;
159b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
160b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
161b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
162b2b77a04SHong Zhang     switch (ndim) {
163b2b77a04SHong Zhang     case 1:
16458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
165b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
16658a912c5SAmlan Barua #else
167e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
16858a912c5SAmlan Barua #endif
169b2b77a04SHong Zhang       break;
170b2b77a04SHong Zhang     case 2:
17158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
172b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
17358a912c5SAmlan Barua #else
174e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
17558a912c5SAmlan Barua #endif
176b2b77a04SHong Zhang       break;
177b2b77a04SHong Zhang     case 3:
17858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
179b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
18058a912c5SAmlan Barua #else
181e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
18258a912c5SAmlan Barua #endif
183b2b77a04SHong Zhang       break;
184b2b77a04SHong Zhang     default:
18558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
186a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
187a1b6d50cSHong Zhang       fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
188a1b6d50cSHong Zhang #else
1898c1d5ab3SHong Zhang       fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
190a1b6d50cSHong Zhang #endif
19158a912c5SAmlan Barua #else
192e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
19358a912c5SAmlan Barua #endif
194b2b77a04SHong Zhang       break;
195b2b77a04SHong Zhang     }
196b2b77a04SHong Zhang     fftw->binarray  = x_array;
197b2b77a04SHong Zhang     fftw->boutarray = y_array;
198b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
199b2b77a04SHong Zhang   } else { /* use existing plan */
200b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
201b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
202b2b77a04SHong Zhang     } else {
203b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
204b2b77a04SHong Zhang     }
205b2b77a04SHong Zhang   }
206b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
207b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
208b2b77a04SHong Zhang   PetscFunctionReturn(0);
209b2b77a04SHong Zhang }
210b2b77a04SHong Zhang 
21197504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel
21297504071SAmlan Barua    Input parameter:
2133564f093SHong Zhang      A - the matrix
21497504071SAmlan Barua      x - the vector on which FDFT will be performed
21597504071SAmlan Barua 
21697504071SAmlan Barua    Output parameter:
21797504071SAmlan Barua    y   - vector that stores result of FDFT
21897504071SAmlan Barua */
219b2b77a04SHong Zhang #undef __FUNCT__
220b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
221b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
222b2b77a04SHong Zhang {
223b2b77a04SHong Zhang   PetscErrorCode ierr;
224b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
225b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
226b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
227c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
228ce94432eSBarry Smith   MPI_Comm       comm;
229b2b77a04SHong Zhang 
230b2b77a04SHong Zhang   PetscFunctionBegin;
231ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
232b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
233b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
234b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
235b2b77a04SHong Zhang     switch (ndim) {
236b2b77a04SHong Zhang     case 1:
2373c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
238b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
239ae0a50aaSHong Zhang #else
2404f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
2413c3a9c75SAmlan Barua #endif
242b2b77a04SHong Zhang       break;
243b2b77a04SHong Zhang     case 2:
24497504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
245b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2463c3a9c75SAmlan Barua #else
2473c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2483c3a9c75SAmlan Barua #endif
249b2b77a04SHong Zhang       break;
250b2b77a04SHong Zhang     case 3:
2513c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
252b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2533c3a9c75SAmlan Barua #else
25451d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2553c3a9c75SAmlan Barua #endif
256b2b77a04SHong Zhang       break;
257b2b77a04SHong Zhang     default:
2583c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
259c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2603c3a9c75SAmlan Barua #else
2613c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2623c3a9c75SAmlan Barua #endif
263b2b77a04SHong Zhang       break;
264b2b77a04SHong Zhang     }
265b2b77a04SHong Zhang     fftw->finarray  = x_array;
266b2b77a04SHong Zhang     fftw->foutarray = y_array;
267b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
268b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
269b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
270b2b77a04SHong Zhang   } else { /* use existing plan */
271b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
272b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
273b2b77a04SHong Zhang     } else {
274b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
275b2b77a04SHong Zhang     }
276b2b77a04SHong Zhang   }
277b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
278b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
279b2b77a04SHong Zhang   PetscFunctionReturn(0);
280b2b77a04SHong Zhang }
281b2b77a04SHong Zhang 
28297504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT
28397504071SAmlan Barua    Input parameter:
2843564f093SHong Zhang      A - the matrix
28597504071SAmlan Barua      x - the vector on which BDFT will be performed
28697504071SAmlan Barua 
28797504071SAmlan Barua    Output parameter:
28897504071SAmlan Barua      y - vector that stores result of BDFT
28997504071SAmlan Barua */
290b2b77a04SHong Zhang #undef __FUNCT__
291b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
292b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
293b2b77a04SHong Zhang {
294b2b77a04SHong Zhang   PetscErrorCode ierr;
295b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
296b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
297b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
298c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
299ce94432eSBarry Smith   MPI_Comm       comm;
300b2b77a04SHong Zhang 
301b2b77a04SHong Zhang   PetscFunctionBegin;
302ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
303b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
304b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
305b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
306b2b77a04SHong Zhang     switch (ndim) {
307b2b77a04SHong Zhang     case 1:
3083c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
309b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
310ae0a50aaSHong Zhang #else
3114f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
3123c3a9c75SAmlan Barua #endif
313b2b77a04SHong Zhang       break;
314b2b77a04SHong Zhang     case 2:
31597504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
316b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3173c3a9c75SAmlan Barua #else
3183c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3193c3a9c75SAmlan Barua #endif
320b2b77a04SHong Zhang       break;
321b2b77a04SHong Zhang     case 3:
3223c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
323b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3243c3a9c75SAmlan Barua #else
3253c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3263c3a9c75SAmlan Barua #endif
327b2b77a04SHong Zhang       break;
328b2b77a04SHong Zhang     default:
3293c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
330c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3313c3a9c75SAmlan Barua #else
3323c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3333c3a9c75SAmlan Barua #endif
334b2b77a04SHong Zhang       break;
335b2b77a04SHong Zhang     }
336b2b77a04SHong Zhang     fftw->binarray  = x_array;
337b2b77a04SHong Zhang     fftw->boutarray = y_array;
338b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
339b2b77a04SHong Zhang   } else { /* use existing plan */
340b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
341b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
342b2b77a04SHong Zhang     } else {
343b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
344b2b77a04SHong Zhang     }
345b2b77a04SHong Zhang   }
346b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
347b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
348b2b77a04SHong Zhang   PetscFunctionReturn(0);
349b2b77a04SHong Zhang }
350b2b77a04SHong Zhang 
351b2b77a04SHong Zhang #undef __FUNCT__
352b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
353b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
354b2b77a04SHong Zhang {
355b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
356b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
357b2b77a04SHong Zhang   PetscErrorCode ierr;
358b2b77a04SHong Zhang 
359b2b77a04SHong Zhang   PetscFunctionBegin;
360b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
361b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
3628c1d5ab3SHong Zhang   if (fftw->iodims) {
3638c1d5ab3SHong Zhang     free(fftw->iodims);
3648c1d5ab3SHong Zhang   }
365bb5bf6f6SHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
366bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
3676ccf0b3eSHong Zhang   fftw_mpi_cleanup();
368b2b77a04SHong Zhang   PetscFunctionReturn(0);
369b2b77a04SHong Zhang }
370b2b77a04SHong Zhang 
371c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
372b2b77a04SHong Zhang #undef __FUNCT__
373b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
374b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
375b2b77a04SHong Zhang {
376b2b77a04SHong Zhang   PetscErrorCode ierr;
377b2b77a04SHong Zhang   PetscScalar    *array;
378b2b77a04SHong Zhang 
379b2b77a04SHong Zhang   PetscFunctionBegin;
380b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
381b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
382b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
383b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
384b2b77a04SHong Zhang   PetscFunctionReturn(0);
385b2b77a04SHong Zhang }
386b2b77a04SHong Zhang 
3874f7415efSAmlan Barua #undef __FUNCT__
3884be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW"
3894be45526SHong Zhang /*@
390b2aa233eSHong Zhang    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
3914f7415efSAmlan Barua      parallel layout determined by FFTW
3924f7415efSAmlan Barua 
3934f7415efSAmlan Barua    Collective on Mat
3944f7415efSAmlan Barua 
3954f7415efSAmlan Barua    Input Parameter:
3963564f093SHong Zhang .   A - the matrix
3974f7415efSAmlan Barua 
3984f7415efSAmlan Barua    Output Parameter:
399cc55f3b1SHong Zhang +   x - (optional) input vector of forward FFTW
400cc55f3b1SHong Zhang -   y - (optional) output vector of forward FFTW
401cc55f3b1SHong Zhang -   z - (optional) output vector of backward FFTW
4024f7415efSAmlan Barua 
4034f7415efSAmlan Barua   Level: advanced
4043564f093SHong Zhang 
40597504071SAmlan Barua   Note: The parallel layout of output of forward FFTW is always same as the input
40697504071SAmlan Barua         of the backward FFTW. But parallel layout of the input vector of forward
40797504071SAmlan Barua         FFTW might not be same as the output of backward FFTW.
40897504071SAmlan Barua         Also note that we need to provide enough space while doing parallel real transform.
40997504071SAmlan Barua         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
41097504071SAmlan Barua         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
411e0ec536eSAmlan Barua         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
412e0ec536eSAmlan Barua         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
413e0ec536eSAmlan Barua         zeros if it is odd only one zero is needed.
414e0ec536eSAmlan Barua         Lastly one needs some scratch space at the end of data set in each process. alloc_local
415e0ec536eSAmlan Barua         figures out how much space is needed, i.e. it figures out the data+scratch space for
416e0ec536eSAmlan Barua         each processor and returns that.
4174f7415efSAmlan Barua 
4184f7415efSAmlan Barua .seealso: MatCreateFFTW()
4194be45526SHong Zhang @*/
4204be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
4214be45526SHong Zhang {
4224be45526SHong Zhang   PetscErrorCode ierr;
423b4c3921fSHong Zhang 
4244be45526SHong Zhang   PetscFunctionBegin;
4254be45526SHong Zhang   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
4264be45526SHong Zhang   PetscFunctionReturn(0);
4274be45526SHong Zhang }
4284be45526SHong Zhang 
4294be45526SHong Zhang #undef __FUNCT__
4304be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW"
4314be45526SHong Zhang PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
4324f7415efSAmlan Barua {
4334f7415efSAmlan Barua   PetscErrorCode ierr;
4344f7415efSAmlan Barua   PetscMPIInt    size,rank;
435ce94432eSBarry Smith   MPI_Comm       comm;
4364f7415efSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
4374f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
4389496c9aeSAmlan Barua   PetscInt       N     = fft->N;
4394f7415efSAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
4404f7415efSAmlan Barua 
4414f7415efSAmlan Barua   PetscFunctionBegin;
4424f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
4434f7415efSAmlan Barua   PetscValidType(A,1);
444ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4454f7415efSAmlan Barua 
4464f8276c3SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
4474f8276c3SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
4484f7415efSAmlan Barua   if (size == 1) { /* sequential case */
4494f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4504f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
4514f7415efSAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
4524f7415efSAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
4534f7415efSAmlan Barua #else
4548ca4c034SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
4558ca4c034SAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
4568ca4c034SAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
4574f7415efSAmlan Barua #endif
45897504071SAmlan Barua   } else { /* parallel cases */
4594f7415efSAmlan Barua     ptrdiff_t    alloc_local,local_n0,local_0_start;
4609496c9aeSAmlan Barua     ptrdiff_t    local_n1;
4619496c9aeSAmlan Barua     fftw_complex *data_fout;
4629496c9aeSAmlan Barua     ptrdiff_t    local_1_start;
4639496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4649496c9aeSAmlan Barua     fftw_complex *data_fin,*data_bout;
4659496c9aeSAmlan Barua #else
4664f7415efSAmlan Barua     double    *data_finr,*data_boutr;
4676ccf0b3eSHong Zhang     PetscInt  n1,N1;
4689496c9aeSAmlan Barua     ptrdiff_t temp;
4699496c9aeSAmlan Barua #endif
4709496c9aeSAmlan Barua 
4714f7415efSAmlan Barua     switch (ndim) {
4724f7415efSAmlan Barua     case 1:
47357625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4744f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
47557625b84SAmlan Barua #else
47657625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
47757625b84SAmlan Barua       if (fin) {
47857625b84SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
479778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
48026fbe8dcSKarl Rupp 
48157625b84SAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
48257625b84SAmlan Barua       }
48357625b84SAmlan Barua       if (fout) {
48457625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
485778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
48626fbe8dcSKarl Rupp 
48757625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
48857625b84SAmlan Barua       }
48957625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
49057625b84SAmlan Barua       if (bout) {
49157625b84SAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
492778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
49326fbe8dcSKarl Rupp 
49457625b84SAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
49557625b84SAmlan Barua       }
49657625b84SAmlan Barua       break;
49757625b84SAmlan Barua #endif
4984f7415efSAmlan Barua     case 2:
49997504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
5004f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5014f7415efSAmlan Barua       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
5024f7415efSAmlan Barua       if (fin) {
5034f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
504778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
50526fbe8dcSKarl Rupp 
5064f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5074f7415efSAmlan Barua       }
5084f7415efSAmlan Barua       if (bout) {
5094f7415efSAmlan Barua         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
510778a2246SBarry Smith         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
51126fbe8dcSKarl Rupp 
5124f7415efSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5134f7415efSAmlan Barua       }
51457625b84SAmlan Barua       if (fout) {
51557625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
516778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
51726fbe8dcSKarl Rupp 
51857625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
51957625b84SAmlan Barua       }
5204f7415efSAmlan Barua #else
5214f7415efSAmlan Barua       /* Get local size */
5224f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
5234f7415efSAmlan Barua       if (fin) {
5244f7415efSAmlan Barua         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
525778a2246SBarry Smith         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
52626fbe8dcSKarl Rupp 
5274f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5284f7415efSAmlan Barua       }
5294f7415efSAmlan Barua       if (fout) {
5304f7415efSAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
531778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
53226fbe8dcSKarl Rupp 
5334f7415efSAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
5344f7415efSAmlan Barua       }
5354f7415efSAmlan Barua       if (bout) {
5364f7415efSAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
537778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
53826fbe8dcSKarl Rupp 
5394f7415efSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5404f7415efSAmlan Barua       }
5414f7415efSAmlan Barua #endif
5424f7415efSAmlan Barua       break;
5434f7415efSAmlan Barua     case 3:
5444f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5454f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5464f7415efSAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
5474f7415efSAmlan Barua       if (fin) {
5484f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
549778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
55026fbe8dcSKarl Rupp 
5514f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5524f7415efSAmlan Barua       }
5534f7415efSAmlan Barua       if (bout) {
5544f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
555778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
55626fbe8dcSKarl Rupp 
5574f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5584f7415efSAmlan Barua       }
5593564f093SHong Zhang 
56057625b84SAmlan Barua       if (fout) {
56157625b84SAmlan Barua         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
562778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
56326fbe8dcSKarl Rupp 
56457625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
56557625b84SAmlan Barua       }
5664f7415efSAmlan Barua #else
5670c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
5680c9b18e4SAmlan Barua       if (fin) {
5690c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
570778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
57126fbe8dcSKarl Rupp 
5720c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5730c9b18e4SAmlan Barua       }
5740c9b18e4SAmlan Barua       if (fout) {
5750c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
576778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
57726fbe8dcSKarl Rupp 
5780c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5790c9b18e4SAmlan Barua       }
5800c9b18e4SAmlan Barua       if (bout) {
5810c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
582778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
58326fbe8dcSKarl Rupp 
5840c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5850c9b18e4SAmlan Barua       }
5864f7415efSAmlan Barua #endif
5874f7415efSAmlan Barua       break;
5884f7415efSAmlan Barua     default:
5894f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5904f7415efSAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
59126fbe8dcSKarl Rupp 
5924f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
59326fbe8dcSKarl Rupp 
5944f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5954f7415efSAmlan Barua       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
59626fbe8dcSKarl Rupp 
5974f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
5984f7415efSAmlan Barua 
5994f7415efSAmlan Barua       if (fin) {
6004f7415efSAmlan Barua         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
601778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
60226fbe8dcSKarl Rupp 
6034f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
6044f7415efSAmlan Barua       }
6054f7415efSAmlan Barua       if (bout) {
6064f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
607778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
60826fbe8dcSKarl Rupp 
6099496c9aeSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
6104f7415efSAmlan Barua       }
6113564f093SHong Zhang 
61257625b84SAmlan Barua       if (fout) {
61357625b84SAmlan Barua         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
614778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
61526fbe8dcSKarl Rupp 
61657625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
61757625b84SAmlan Barua       }
6184f7415efSAmlan Barua #else
6190c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
6200c9b18e4SAmlan Barua       if (fin) {
6210c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
622778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
62326fbe8dcSKarl Rupp 
6240c9b18e4SAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
6250c9b18e4SAmlan Barua       }
6260c9b18e4SAmlan Barua       if (fout) {
6270c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
628778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
62926fbe8dcSKarl Rupp 
6300c9b18e4SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
6310c9b18e4SAmlan Barua       }
6320c9b18e4SAmlan Barua       if (bout) {
6330c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
634778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
63526fbe8dcSKarl Rupp 
6360c9b18e4SAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
6370c9b18e4SAmlan Barua       }
6384f7415efSAmlan Barua #endif
6394f7415efSAmlan Barua       break;
6404f7415efSAmlan Barua     }
6414f7415efSAmlan Barua   }
6424f7415efSAmlan Barua   PetscFunctionReturn(0);
6434f7415efSAmlan Barua }
6444f7415efSAmlan Barua 
645c92418ccSAmlan Barua #undef __FUNCT__
64674a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW"
6473564f093SHong Zhang /*@
6483564f093SHong Zhang    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
64954efbe56SHong Zhang 
6503564f093SHong Zhang    Collective on Mat
6513564f093SHong Zhang 
6523564f093SHong Zhang    Input Parameters:
6533564f093SHong Zhang +  A - FFTW matrix
6543564f093SHong Zhang -  x - the PETSc vector
6553564f093SHong Zhang 
6563564f093SHong Zhang    Output Parameters:
6573564f093SHong Zhang .  y - the FFTW vector
6583564f093SHong Zhang 
659b2b77a04SHong Zhang   Options Database Keys:
6603564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags
661b2b77a04SHong Zhang 
662b2b77a04SHong Zhang    Level: intermediate
6633564f093SHong Zhang 
66497504071SAmlan Barua    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
66597504071SAmlan Barua          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
66697504071SAmlan Barua          zeros. This routine does that job by scattering operation.
66797504071SAmlan Barua 
6683564f093SHong Zhang .seealso: VecScatterFFTWToPetsc()
6693564f093SHong Zhang @*/
6703564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
6713564f093SHong Zhang {
6723564f093SHong Zhang   PetscErrorCode ierr;
673b2b77a04SHong Zhang 
6743564f093SHong Zhang   PetscFunctionBegin;
6753564f093SHong Zhang   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
6763564f093SHong Zhang   PetscFunctionReturn(0);
6773564f093SHong Zhang }
6783c3a9c75SAmlan Barua 
6793c3a9c75SAmlan Barua #undef __FUNCT__
6801986ecc6SHong Zhang #define __FUNCT__ "VecScatterPetscToFFTW_FFTW"
68174a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
6823c3a9c75SAmlan Barua {
6833c3a9c75SAmlan Barua   PetscErrorCode ierr;
684ce94432eSBarry Smith   MPI_Comm       comm;
6853c3a9c75SAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
6863c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
6879496c9aeSAmlan Barua   PetscInt       N     =fft->N;
688b5d11533SAmlan Barua   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
6899496c9aeSAmlan Barua   PetscInt       low;
6907a21806cSHong Zhang   PetscMPIInt    rank,size;
6917a21806cSHong Zhang   PetscInt       vsize,vsize1;
6927a21806cSHong Zhang   ptrdiff_t      local_n0,local_0_start;
6939496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
6943c3a9c75SAmlan Barua   VecScatter     vecscat;
6953c3a9c75SAmlan Barua   IS             list1,list2;
6969496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6979496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
6989496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
6999496c9aeSAmlan Barua   PetscInt       N1,n1,NM;
7009496c9aeSAmlan Barua   ptrdiff_t      temp;
7019496c9aeSAmlan Barua #endif
7023c3a9c75SAmlan Barua 
7033564f093SHong Zhang   PetscFunctionBegin;
704ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
705f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
706f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
7070298fd71SBarry Smith   ierr = VecGetOwnershipRange(y,&low,NULL);
7083c3a9c75SAmlan Barua 
7093564f093SHong Zhang   if (size==1) {
7108ca4c034SAmlan Barua     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
7118ca4c034SAmlan Barua     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
7129496c9aeSAmlan Barua     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
7136971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
7146971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7156971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7166971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
717b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
7183564f093SHong Zhang   } else {
7193c3a9c75SAmlan Barua     switch (ndim) {
7203c3a9c75SAmlan Barua     case 1:
72164657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7227a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
72326fbe8dcSKarl Rupp 
7244f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);
7254f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,low,1,&list2);
72664657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
72764657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
72864657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
72964657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
73064657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
73164657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
73264657d84SAmlan Barua #else
733e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
73464657d84SAmlan Barua #endif
7353c3a9c75SAmlan Barua       break;
7363c3a9c75SAmlan Barua     case 2:
737bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7387a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
73926fbe8dcSKarl Rupp 
7404f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
7414f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
742bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
743bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
744bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
745bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
746bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
747bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
748bd59e6c6SAmlan Barua #else
749c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
75026fbe8dcSKarl Rupp 
7513c3a9c75SAmlan Barua       N1   = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
7529496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
7539496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
7543c3a9c75SAmlan Barua 
7553564f093SHong Zhang       if (dim[1]%2==0) {
7563c3a9c75SAmlan Barua         NM = dim[1]+2;
7573564f093SHong Zhang       } else {
7583c3a9c75SAmlan Barua         NM = dim[1]+1;
7593564f093SHong Zhang       }
7603c3a9c75SAmlan Barua       for (i=0; i<local_n0; i++) {
7613c3a9c75SAmlan Barua         for (j=0; j<dim[1]; j++) {
7623c3a9c75SAmlan Barua           tempindx  = i*dim[1] + j;
7633c3a9c75SAmlan Barua           tempindx1 = i*NM + j;
76426fbe8dcSKarl Rupp 
7655b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
7663c3a9c75SAmlan Barua           indx2[tempindx]=low+tempindx1;
7673c3a9c75SAmlan Barua         }
7683c3a9c75SAmlan Barua       }
7693c3a9c75SAmlan Barua 
7703c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
7713c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
7723c3a9c75SAmlan Barua 
773f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
774f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
775f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
776f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
777b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
778b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
779b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
780b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
781bd59e6c6SAmlan Barua #endif
7829496c9aeSAmlan Barua       break;
7833c3a9c75SAmlan Barua 
7843c3a9c75SAmlan Barua     case 3:
785bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7867a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
78726fbe8dcSKarl Rupp 
7884f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
7894f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
790bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
791bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
792bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
793bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
794bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
795bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
796bd59e6c6SAmlan Barua #else
7977a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
79826fbe8dcSKarl Rupp 
79951d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
80051d1eed7SAmlan Barua 
8019496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
8029496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
80351d1eed7SAmlan Barua 
804db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
805db4deed7SKarl Rupp       else             NM = dim[2]+1;
80651d1eed7SAmlan Barua 
80751d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
80851d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
80951d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
81051d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
81151d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
81226fbe8dcSKarl Rupp 
81351d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
81451d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
81551d1eed7SAmlan Barua           }
81651d1eed7SAmlan Barua         }
81751d1eed7SAmlan Barua       }
81851d1eed7SAmlan Barua 
81951d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
82051d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
82151d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
82251d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
82351d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
82451d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
825b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
826b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
827b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
828b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
829bd59e6c6SAmlan Barua #endif
8309496c9aeSAmlan Barua       break;
8313c3a9c75SAmlan Barua 
8323c3a9c75SAmlan Barua     default:
833bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
8347a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
83526fbe8dcSKarl Rupp 
8364f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
8374f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
838bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
839bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
840bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
841bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
842bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
843bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
844bd59e6c6SAmlan Barua #else
845e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
84626fbe8dcSKarl Rupp 
847e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
84826fbe8dcSKarl Rupp 
8497a21806cSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
85026fbe8dcSKarl Rupp 
851e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
85226fbe8dcSKarl Rupp 
853e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
854e5d7f247SAmlan Barua 
855e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
856e5d7f247SAmlan Barua 
857e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
858e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
859e5d7f247SAmlan Barua 
860db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
861db4deed7SKarl Rupp       else                  NM = 1;
862e5d7f247SAmlan Barua 
8636971673cSAmlan Barua       j = low;
8643564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
8656971673cSAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
8666971673cSAmlan Barua         indx2[i] = j;
86726fbe8dcSKarl Rupp         if (k%dim[ndim-1]==0) j+=NM;
8686971673cSAmlan Barua         j++;
8696971673cSAmlan Barua       }
8706971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
8716971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
8726971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
8736971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8746971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8756971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
876b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
877b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
878b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
879b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
880bd59e6c6SAmlan Barua #endif
8819496c9aeSAmlan Barua       break;
8823c3a9c75SAmlan Barua     }
883e81bb599SAmlan Barua   }
8843564f093SHong Zhang   PetscFunctionReturn(0);
8853c3a9c75SAmlan Barua }
8863c3a9c75SAmlan Barua 
8873c3a9c75SAmlan Barua #undef __FUNCT__
88874a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc"
8893564f093SHong Zhang /*@
8903564f093SHong Zhang    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
8913564f093SHong Zhang 
8923564f093SHong Zhang    Collective on Mat
8933564f093SHong Zhang 
8943564f093SHong Zhang     Input Parameters:
8953564f093SHong Zhang +   A - FFTW matrix
8963564f093SHong Zhang -   x - FFTW vector
8973564f093SHong Zhang 
8983564f093SHong Zhang    Output Parameters:
8993564f093SHong Zhang .  y - PETSc vector
9003564f093SHong Zhang 
9013564f093SHong Zhang    Level: intermediate
9023564f093SHong Zhang 
9033564f093SHong Zhang    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
9043564f093SHong Zhang          VecScatterFFTWToPetsc removes those extra zeros.
9053564f093SHong Zhang 
9063564f093SHong Zhang .seealso: VecScatterPetscToFFTW()
9073564f093SHong Zhang @*/
90874a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
9093c3a9c75SAmlan Barua {
9103c3a9c75SAmlan Barua   PetscErrorCode ierr;
9115fd66863SKarl Rupp 
9123c3a9c75SAmlan Barua   PetscFunctionBegin;
91374a26884SAmlan Barua   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
9143c3a9c75SAmlan Barua   PetscFunctionReturn(0);
9153c3a9c75SAmlan Barua }
91654efbe56SHong Zhang 
9173c3a9c75SAmlan Barua #undef __FUNCT__
91874a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
91974a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
9205b3e41ffSAmlan Barua {
9215b3e41ffSAmlan Barua   PetscErrorCode ierr;
922ce94432eSBarry Smith   MPI_Comm       comm;
9235b3e41ffSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
9245b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
9259496c9aeSAmlan Barua   PetscInt       N     = fft->N;
926b3655f67SAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
9279496c9aeSAmlan Barua   PetscInt       low;
9287a21806cSHong Zhang   PetscMPIInt    rank,size;
9297a21806cSHong Zhang   ptrdiff_t      local_n0,local_0_start;
9309496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
9315b3e41ffSAmlan Barua   VecScatter     vecscat;
9325b3e41ffSAmlan Barua   IS             list1,list2;
9339496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
9349496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
9359496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
9369496c9aeSAmlan Barua   PetscInt       N1, n1,NM;
9379496c9aeSAmlan Barua   ptrdiff_t      temp;
9389496c9aeSAmlan Barua #endif
9399496c9aeSAmlan Barua 
9403564f093SHong Zhang   PetscFunctionBegin;
941ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
9425b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
9435b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
9440298fd71SBarry Smith   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
9455b3e41ffSAmlan Barua 
946e81bb599SAmlan Barua   if (size==1) {
947b3655f67SAmlan Barua     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
9486971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
9496971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9506971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9516971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
952b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
953e81bb599SAmlan Barua 
9543564f093SHong Zhang   } else {
955e81bb599SAmlan Barua     switch (ndim) {
956e81bb599SAmlan Barua     case 1:
95764657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
9587a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
95926fbe8dcSKarl Rupp 
9604f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);
9614f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,low,1,&list2);
96264657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
96364657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
96464657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
96564657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
96664657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
96764657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
96864657d84SAmlan Barua #else
9696a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
97064657d84SAmlan Barua #endif
9715b3e41ffSAmlan Barua       break;
9725b3e41ffSAmlan Barua     case 2:
973bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
9747a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
97526fbe8dcSKarl Rupp 
9764f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
9774f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
978bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
979bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
980bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
981bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
982bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
983bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
984bd59e6c6SAmlan Barua #else
9857a21806cSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
98626fbe8dcSKarl Rupp 
9875b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
9885b3e41ffSAmlan Barua 
9899496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
9909496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
9915b3e41ffSAmlan Barua 
992db4deed7SKarl Rupp       if (dim[1]%2==0) NM = dim[1]+2;
993db4deed7SKarl Rupp       else             NM = dim[1]+1;
9945b3e41ffSAmlan Barua 
9955b3e41ffSAmlan Barua       for (i=0; i<local_n0; i++) {
9965b3e41ffSAmlan Barua         for (j=0; j<dim[1]; j++) {
9975b3e41ffSAmlan Barua           tempindx = i*dim[1] + j;
9985b3e41ffSAmlan Barua           tempindx1 = i*NM + j;
99926fbe8dcSKarl Rupp 
10005b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
10015b3e41ffSAmlan Barua           indx2[tempindx]=low+tempindx1;
10025b3e41ffSAmlan Barua         }
10035b3e41ffSAmlan Barua       }
10045b3e41ffSAmlan Barua 
10055b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
10065b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
10075b3e41ffSAmlan Barua 
10085b3e41ffSAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
10095b3e41ffSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10105b3e41ffSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10115b3e41ffSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1012b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1013b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1014b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1015b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1016bd59e6c6SAmlan Barua #endif
10179496c9aeSAmlan Barua       break;
10185b3e41ffSAmlan Barua     case 3:
1019bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10207a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
102126fbe8dcSKarl Rupp 
10224f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
10234f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
1024bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1025bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1026bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1027bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1028bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1029bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1030bd59e6c6SAmlan Barua #else
1031bd59e6c6SAmlan Barua 
10327a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
103326fbe8dcSKarl Rupp 
103451d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
103551d1eed7SAmlan Barua 
10369496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
10379496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
103851d1eed7SAmlan Barua 
1039db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
1040db4deed7SKarl Rupp       else             NM = dim[2]+1;
104151d1eed7SAmlan Barua 
104251d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
104351d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
104451d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
104551d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
104651d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
104726fbe8dcSKarl Rupp 
104851d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
104951d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
105051d1eed7SAmlan Barua           }
105151d1eed7SAmlan Barua         }
105251d1eed7SAmlan Barua       }
105351d1eed7SAmlan Barua 
105451d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
105551d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
105651d1eed7SAmlan Barua 
105751d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
105851d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
105951d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
106051d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1061b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1062b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1063b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1064b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1065bd59e6c6SAmlan Barua #endif
10669496c9aeSAmlan Barua       break;
10675b3e41ffSAmlan Barua     default:
1068bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10697a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
107026fbe8dcSKarl Rupp 
10714f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
10724f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1073bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1074bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1075bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1076bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1077bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1078bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1079bd59e6c6SAmlan Barua #else
1080ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
108126fbe8dcSKarl Rupp 
1082ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
108326fbe8dcSKarl Rupp 
1084c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
108526fbe8dcSKarl Rupp 
1086ba6e06d0SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
108726fbe8dcSKarl Rupp 
1088ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1089ba6e06d0SAmlan Barua 
1090ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1091ba6e06d0SAmlan Barua 
1092ba6e06d0SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1093ba6e06d0SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1094ba6e06d0SAmlan Barua 
1095db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
1096db4deed7SKarl Rupp       else                  NM = 1;
1097ba6e06d0SAmlan Barua 
1098ba6e06d0SAmlan Barua       j = low;
10993564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1100ba6e06d0SAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
1101ba6e06d0SAmlan Barua         indx2[i] = j;
11023564f093SHong Zhang         if (k%dim[ndim-1]==0) j+=NM;
1103ba6e06d0SAmlan Barua         j++;
1104ba6e06d0SAmlan Barua       }
1105ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1106ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1107ba6e06d0SAmlan Barua 
1108ba6e06d0SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1109ba6e06d0SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1110ba6e06d0SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1111ba6e06d0SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1112b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1113b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1114b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1115b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1116bd59e6c6SAmlan Barua #endif
11179496c9aeSAmlan Barua       break;
11185b3e41ffSAmlan Barua     }
1119e81bb599SAmlan Barua   }
11203564f093SHong Zhang   PetscFunctionReturn(0);
11215b3e41ffSAmlan Barua }
11225b3e41ffSAmlan Barua 
11235b3e41ffSAmlan Barua #undef __FUNCT__
11243c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
11253c3a9c75SAmlan Barua /*
11263564f093SHong Zhang     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
11273564f093SHong Zhang 
11283c3a9c75SAmlan Barua   Options Database Keys:
11293c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
11303c3a9c75SAmlan Barua 
11313c3a9c75SAmlan Barua    Level: intermediate
11323c3a9c75SAmlan Barua 
11333c3a9c75SAmlan Barua */
11348cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1135b2b77a04SHong Zhang {
1136b2b77a04SHong Zhang   PetscErrorCode ierr;
1137ce94432eSBarry Smith   MPI_Comm       comm;
1138b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1139b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1140b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
11415274ed1bSHong Zhang   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
11425274ed1bSHong Zhang   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1143b2b77a04SHong Zhang   PetscBool      flg;
11444f48bc67SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr;
114511902ff2SHong Zhang   PetscMPIInt    size,rank;
11469496c9aeSAmlan Barua   ptrdiff_t      *pdim;
11479496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
11489496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11499496c9aeSAmlan Barua   ptrdiff_t      temp;
11508ca4c034SAmlan Barua   PetscInt       N1,tot_dim;
11514f48bc67SAmlan Barua #else
11524f48bc67SAmlan Barua   PetscInt       n1;
11539496c9aeSAmlan Barua #endif
11549496c9aeSAmlan Barua 
1155b2b77a04SHong Zhang   PetscFunctionBegin;
1156ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1157b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
115811902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1159c92418ccSAmlan Barua 
11601878d478SAmlan Barua   fftw_mpi_init();
116111902ff2SHong Zhang   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
116211902ff2SHong Zhang   pdim[0] = dim[0];
11638ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11648ca4c034SAmlan Barua   tot_dim = 2*dim[0];
11658ca4c034SAmlan Barua #endif
11663564f093SHong Zhang   for (ctr=1; ctr<ndim; ctr++) {
11676882372aSHong Zhang     partial_dim *= dim[ctr];
116811902ff2SHong Zhang     pdim[ctr]    = dim[ctr];
11698ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1170db4deed7SKarl Rupp     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1171db4deed7SKarl Rupp     else             tot_dim *= dim[ctr];
11728ca4c034SAmlan Barua #endif
11736882372aSHong Zhang   }
11743c3a9c75SAmlan Barua 
1175b2b77a04SHong Zhang   if (size == 1) {
1176e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1177b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1178b2b77a04SHong Zhang     n    = N;
1179e81bb599SAmlan Barua #else
1180e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1181e81bb599SAmlan Barua     n    = tot_dim;
1182e81bb599SAmlan Barua #endif
1183e81bb599SAmlan Barua 
1184b2b77a04SHong Zhang   } else {
11857a21806cSHong Zhang     ptrdiff_t local_n0,local_0_start;
1186b2b77a04SHong Zhang     switch (ndim) {
1187b2b77a04SHong Zhang     case 1:
11883c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11893c3a9c75SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1190e5d7f247SAmlan Barua #else
11917a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
119226fbe8dcSKarl Rupp 
11936882372aSHong Zhang       n    = (PetscInt)local_n0;
11949496c9aeSAmlan Barua       n1   = (PetscInt)local_n1;
11959496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1196e5d7f247SAmlan Barua #endif
1197b2b77a04SHong Zhang       break;
1198b2b77a04SHong Zhang     case 2:
11995b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
12007a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1201b2b77a04SHong Zhang       /*
1202b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
12030ec8b6e3SBarry Smith        PetscSynchronizedFlush(comm,PETSC_STDOUT);
1204b2b77a04SHong Zhang        */
1205b2b77a04SHong Zhang       n    = (PetscInt)local_n0*dim[1];
1206b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
12075b3e41ffSAmlan Barua #else
1208c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
120926fbe8dcSKarl Rupp 
12105b3e41ffSAmlan Barua       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
1211c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
12125b3e41ffSAmlan Barua #endif
1213b2b77a04SHong Zhang       break;
1214b2b77a04SHong Zhang     case 3:
121551d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12167a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
121726fbe8dcSKarl Rupp 
12186882372aSHong Zhang       n    = (PetscInt)local_n0*dim[1]*dim[2];
12196882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
122051d1eed7SAmlan Barua #else
1221c3eba89aSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
122226fbe8dcSKarl Rupp 
122351d1eed7SAmlan Barua       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1224c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
122551d1eed7SAmlan Barua #endif
1226b2b77a04SHong Zhang       break;
1227b2b77a04SHong Zhang     default:
1228b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12297a21806cSHong Zhang       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
123026fbe8dcSKarl Rupp 
12316882372aSHong Zhang       n    = (PetscInt)local_n0*partial_dim;
12326882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1233b3a17365SAmlan Barua #else
1234b3a17365SAmlan Barua       temp = pdim[ndim-1];
123526fbe8dcSKarl Rupp 
1236b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
123726fbe8dcSKarl Rupp 
1238c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
123926fbe8dcSKarl Rupp 
1240b3a17365SAmlan Barua       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1241b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
124226fbe8dcSKarl Rupp 
1243b3a17365SAmlan Barua       pdim[ndim-1] = temp;
124426fbe8dcSKarl Rupp 
1245c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1246b3a17365SAmlan Barua #endif
1247b2b77a04SHong Zhang       break;
1248b2b77a04SHong Zhang     }
1249b2b77a04SHong Zhang   }
1250b2b77a04SHong Zhang   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1251b00a9115SJed Brown   ierr      = PetscNewLog(A,&fftw);CHKERRQ(ierr);
1252b2b77a04SHong Zhang   fft->data = (void*)fftw;
1253b2b77a04SHong Zhang 
1254b2b77a04SHong Zhang   fft->n            = n;
12550c74a584SJed Brown   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1256e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
125726fbe8dcSKarl Rupp 
12585e806cb5SMatthew G. Knepley   ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr);
12598c1d5ab3SHong Zhang   if (size == 1) {
1260a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
1261a1b6d50cSHong Zhang     fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1262a1b6d50cSHong Zhang #else
12638c1d5ab3SHong Zhang     fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1264a1b6d50cSHong Zhang #endif
12658c1d5ab3SHong Zhang   }
126626fbe8dcSKarl Rupp 
1267b1301b2eSHong Zhang   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1268c92418ccSAmlan Barua 
1269b2b77a04SHong Zhang   fftw->p_forward  = 0;
1270b2b77a04SHong Zhang   fftw->p_backward = 0;
1271b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1272b2b77a04SHong Zhang 
1273b2b77a04SHong Zhang   if (size == 1) {
1274b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1275b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1276b2b77a04SHong Zhang   } else {
1277b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1278b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1279b2b77a04SHong Zhang   }
1280b2b77a04SHong Zhang   fft->matdestroy = MatDestroy_FFTW;
1281b2b77a04SHong Zhang   A->assembled    = PETSC_TRUE;
12824a52bad8SHong Zhang   A->preallocated = PETSC_TRUE;
128326fbe8dcSKarl Rupp 
1284bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr);
1285bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
1286bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1287b2b77a04SHong Zhang 
1288b2b77a04SHong Zhang   /* get runtime options */
1289ce94432eSBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
12905274ed1bSHong Zhang   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr);
12915274ed1bSHong Zhang   if (flg) {
12925274ed1bSHong Zhang     fftw->p_flag = iplans[p_flag];
12935274ed1bSHong Zhang   }
12944a52bad8SHong Zhang   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1295b2b77a04SHong Zhang   PetscFunctionReturn(0);
1296b2b77a04SHong Zhang }
12973c3a9c75SAmlan Barua 
12983c3a9c75SAmlan Barua 
12993c3a9c75SAmlan Barua 
13003c3a9c75SAmlan Barua 
1301