xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision a1b6d50c236cc8b4c78decb1d66d5347aeec3d46)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14*a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
15*a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
16*a1b6d50cSHong Zhang #else
178c1d5ab3SHong Zhang   fftw_iodim   *iodims;
18*a1b6d50cSHong Zhang #endif
19e5d7f247SAmlan Barua   PetscInt    partial_dim;
20b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
21b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
22b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
23b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
24b2b77a04SHong Zhang } Mat_FFTW;
25b2b77a04SHong Zhang 
26b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
27b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
28b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
29b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
30b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
31b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
32b2b77a04SHong Zhang 
3397504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel
3497504071SAmlan Barua    Input parameter:
353564f093SHong Zhang      A - the matrix
3697504071SAmlan Barua      x - the vector on which FDFT will be performed
3797504071SAmlan Barua 
3897504071SAmlan Barua    Output parameter:
3997504071SAmlan Barua      y - vector that stores result of FDFT
4097504071SAmlan Barua */
41b2b77a04SHong Zhang #undef __FUNCT__
42b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
43b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
44b2b77a04SHong Zhang {
45b2b77a04SHong Zhang   PetscErrorCode ierr;
46b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
47b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
48b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
49*a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
50*a1b6d50cSHong Zhang   fftw_iodim64   *iodims;
51*a1b6d50cSHong Zhang #else
528c1d5ab3SHong Zhang   fftw_iodim     *iodims;
53*a1b6d50cSHong Zhang #endif
54bb5bf6f6SHong Zhang   PetscInt       ndim = fft->ndim,*dim = fft->dim,i;
55b2b77a04SHong Zhang 
56b2b77a04SHong Zhang   PetscFunctionBegin;
57b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
58b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
59b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
60b2b77a04SHong Zhang     switch (ndim) {
61b2b77a04SHong Zhang     case 1:
6258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
63b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6458a912c5SAmlan Barua #else
6558a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
6658a912c5SAmlan Barua #endif
67b2b77a04SHong Zhang       break;
68b2b77a04SHong Zhang     case 2:
6958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
70b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
7158a912c5SAmlan Barua #else
7258a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
7358a912c5SAmlan Barua #endif
74b2b77a04SHong Zhang       break;
75b2b77a04SHong Zhang     case 3:
7658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
77b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
7858a912c5SAmlan Barua #else
7958a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
8058a912c5SAmlan Barua #endif
81b2b77a04SHong Zhang       break;
82b2b77a04SHong Zhang     default:
8358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
84*a1b6d50cSHong Zhang       iodims = fftw->iodims;
85*a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
868c1d5ab3SHong Zhang       if (ndim) {
87*a1b6d50cSHong Zhang         iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
888c1d5ab3SHong Zhang         iodims[ndim-1].is = iodims[ndim-1].os = 1;
898c1d5ab3SHong Zhang         for (i=ndim-2; i>=0; --i) {
90*a1b6d50cSHong Zhang           iodims[i].n = (ptrdiff_t)dim[i];
918c1d5ab3SHong Zhang           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
928c1d5ab3SHong Zhang         }
938c1d5ab3SHong Zhang       }
94*a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
95*a1b6d50cSHong Zhang #else
96*a1b6d50cSHong Zhang       if (ndim) {
97*a1b6d50cSHong Zhang         iodims[ndim-1].n = (int)dim[ndim-1];
98*a1b6d50cSHong Zhang         iodims[ndim-1].is = iodims[ndim-1].os = 1;
99*a1b6d50cSHong Zhang         for (i=ndim-2; i>=0; --i) {
100*a1b6d50cSHong Zhang           iodims[i].n = (int)dim[i];
101*a1b6d50cSHong Zhang           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
102*a1b6d50cSHong Zhang         }
103*a1b6d50cSHong Zhang       }
104*a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
105*a1b6d50cSHong Zhang #endif
106*a1b6d50cSHong Zhang 
10758a912c5SAmlan Barua #else
10858a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
10958a912c5SAmlan Barua #endif
110b2b77a04SHong Zhang       break;
111b2b77a04SHong Zhang     }
112b2b77a04SHong Zhang     fftw->finarray  = x_array;
113b2b77a04SHong Zhang     fftw->foutarray = y_array;
114b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
115b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
116b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
117b2b77a04SHong Zhang   } else { /* use existing plan */
118b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
119b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
120b2b77a04SHong Zhang     } else {
121b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
122b2b77a04SHong Zhang     }
123b2b77a04SHong Zhang   }
124b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
125b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
126b2b77a04SHong Zhang   PetscFunctionReturn(0);
127b2b77a04SHong Zhang }
128b2b77a04SHong Zhang 
12997504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT
13097504071SAmlan Barua    Input parameter:
1313564f093SHong Zhang      A - the matrix
13297504071SAmlan Barua      x - the vector on which BDFT will be performed
13397504071SAmlan Barua 
13497504071SAmlan Barua    Output parameter:
13597504071SAmlan Barua      y - vector that stores result of BDFT
13697504071SAmlan Barua */
13797504071SAmlan Barua 
138b2b77a04SHong Zhang #undef __FUNCT__
139b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
140b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
141b2b77a04SHong Zhang {
142b2b77a04SHong Zhang   PetscErrorCode ierr;
143b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
144b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
145b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
146b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
147*a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
148*a1b6d50cSHong Zhang   fftw_iodim64   *iodims=fftw->iodims;
149*a1b6d50cSHong Zhang #else
150*a1b6d50cSHong Zhang   fftw_iodim     *iodims=fftw->iodims;
151*a1b6d50cSHong Zhang #endif
152b2b77a04SHong Zhang 
153b2b77a04SHong Zhang   PetscFunctionBegin;
154b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
155b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
156b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
157b2b77a04SHong Zhang     switch (ndim) {
158b2b77a04SHong Zhang     case 1:
15958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
160b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
16158a912c5SAmlan Barua #else
162e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
16358a912c5SAmlan Barua #endif
164b2b77a04SHong Zhang       break;
165b2b77a04SHong Zhang     case 2:
16658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
167b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
16858a912c5SAmlan Barua #else
169e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
17058a912c5SAmlan Barua #endif
171b2b77a04SHong Zhang       break;
172b2b77a04SHong Zhang     case 3:
17358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
174b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
17558a912c5SAmlan Barua #else
176e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
17758a912c5SAmlan Barua #endif
178b2b77a04SHong Zhang       break;
179b2b77a04SHong Zhang     default:
18058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
181*a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
182*a1b6d50cSHong Zhang       fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
183*a1b6d50cSHong Zhang #else
1848c1d5ab3SHong Zhang       fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
185*a1b6d50cSHong Zhang #endif
18658a912c5SAmlan Barua #else
187e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
18858a912c5SAmlan Barua #endif
189b2b77a04SHong Zhang       break;
190b2b77a04SHong Zhang     }
191b2b77a04SHong Zhang     fftw->binarray  = x_array;
192b2b77a04SHong Zhang     fftw->boutarray = y_array;
193b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
194b2b77a04SHong Zhang   } else { /* use existing plan */
195b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
196b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
197b2b77a04SHong Zhang     } else {
198b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
199b2b77a04SHong Zhang     }
200b2b77a04SHong Zhang   }
201b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
202b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
203b2b77a04SHong Zhang   PetscFunctionReturn(0);
204b2b77a04SHong Zhang }
205b2b77a04SHong Zhang 
20697504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel
20797504071SAmlan Barua    Input parameter:
2083564f093SHong Zhang      A - the matrix
20997504071SAmlan Barua      x - the vector on which FDFT will be performed
21097504071SAmlan Barua 
21197504071SAmlan Barua    Output parameter:
21297504071SAmlan Barua    y   - vector that stores result of FDFT
21397504071SAmlan Barua */
214b2b77a04SHong Zhang #undef __FUNCT__
215b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
216b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
217b2b77a04SHong Zhang {
218b2b77a04SHong Zhang   PetscErrorCode ierr;
219b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
220b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
221b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
222c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
223ce94432eSBarry Smith   MPI_Comm       comm;
224b2b77a04SHong Zhang 
225b2b77a04SHong Zhang   PetscFunctionBegin;
226ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
227b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
228b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
229b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
230b2b77a04SHong Zhang     switch (ndim) {
231b2b77a04SHong Zhang     case 1:
2323c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
233b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
234ae0a50aaSHong Zhang #else
2354f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
2363c3a9c75SAmlan Barua #endif
237b2b77a04SHong Zhang       break;
238b2b77a04SHong Zhang     case 2:
23997504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
240b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2413c3a9c75SAmlan Barua #else
2423c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2433c3a9c75SAmlan Barua #endif
244b2b77a04SHong Zhang       break;
245b2b77a04SHong Zhang     case 3:
2463c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
247b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2483c3a9c75SAmlan Barua #else
24951d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2503c3a9c75SAmlan Barua #endif
251b2b77a04SHong Zhang       break;
252b2b77a04SHong Zhang     default:
2533c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
254c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2553c3a9c75SAmlan Barua #else
2563c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2573c3a9c75SAmlan Barua #endif
258b2b77a04SHong Zhang       break;
259b2b77a04SHong Zhang     }
260b2b77a04SHong Zhang     fftw->finarray  = x_array;
261b2b77a04SHong Zhang     fftw->foutarray = y_array;
262b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
263b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
264b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
265b2b77a04SHong Zhang   } else { /* use existing plan */
266b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
267b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
268b2b77a04SHong Zhang     } else {
269b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
270b2b77a04SHong Zhang     }
271b2b77a04SHong Zhang   }
272b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
273b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
274b2b77a04SHong Zhang   PetscFunctionReturn(0);
275b2b77a04SHong Zhang }
276b2b77a04SHong Zhang 
27797504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT
27897504071SAmlan Barua    Input parameter:
2793564f093SHong Zhang      A - the matrix
28097504071SAmlan Barua      x - the vector on which BDFT will be performed
28197504071SAmlan Barua 
28297504071SAmlan Barua    Output parameter:
28397504071SAmlan Barua      y - vector that stores result of BDFT
28497504071SAmlan Barua */
285b2b77a04SHong Zhang #undef __FUNCT__
286b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
287b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
288b2b77a04SHong Zhang {
289b2b77a04SHong Zhang   PetscErrorCode ierr;
290b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
291b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
292b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
293c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
294ce94432eSBarry Smith   MPI_Comm       comm;
295b2b77a04SHong Zhang 
296b2b77a04SHong Zhang   PetscFunctionBegin;
297ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
298b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
299b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
300b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
301b2b77a04SHong Zhang     switch (ndim) {
302b2b77a04SHong Zhang     case 1:
3033c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
304b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
305ae0a50aaSHong Zhang #else
3064f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
3073c3a9c75SAmlan Barua #endif
308b2b77a04SHong Zhang       break;
309b2b77a04SHong Zhang     case 2:
31097504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
311b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3123c3a9c75SAmlan Barua #else
3133c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3143c3a9c75SAmlan Barua #endif
315b2b77a04SHong Zhang       break;
316b2b77a04SHong Zhang     case 3:
3173c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
318b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3193c3a9c75SAmlan Barua #else
3203c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3213c3a9c75SAmlan Barua #endif
322b2b77a04SHong Zhang       break;
323b2b77a04SHong Zhang     default:
3243c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
325c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3263c3a9c75SAmlan Barua #else
3273c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3283c3a9c75SAmlan Barua #endif
329b2b77a04SHong Zhang       break;
330b2b77a04SHong Zhang     }
331b2b77a04SHong Zhang     fftw->binarray  = x_array;
332b2b77a04SHong Zhang     fftw->boutarray = y_array;
333b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
334b2b77a04SHong Zhang   } else { /* use existing plan */
335b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
336b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
337b2b77a04SHong Zhang     } else {
338b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
339b2b77a04SHong Zhang     }
340b2b77a04SHong Zhang   }
341b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
342b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
343b2b77a04SHong Zhang   PetscFunctionReturn(0);
344b2b77a04SHong Zhang }
345b2b77a04SHong Zhang 
346b2b77a04SHong Zhang #undef __FUNCT__
347b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
348b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
349b2b77a04SHong Zhang {
350b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
351b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
352b2b77a04SHong Zhang   PetscErrorCode ierr;
353b2b77a04SHong Zhang 
354b2b77a04SHong Zhang   PetscFunctionBegin;
355b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
356b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
3578c1d5ab3SHong Zhang   if (fftw->iodims) {
3588c1d5ab3SHong Zhang     free(fftw->iodims);
3598c1d5ab3SHong Zhang   }
360bb5bf6f6SHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
361bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
3626ccf0b3eSHong Zhang   fftw_mpi_cleanup();
363b2b77a04SHong Zhang   PetscFunctionReturn(0);
364b2b77a04SHong Zhang }
365b2b77a04SHong Zhang 
366c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
367b2b77a04SHong Zhang #undef __FUNCT__
368b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
369b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
370b2b77a04SHong Zhang {
371b2b77a04SHong Zhang   PetscErrorCode ierr;
372b2b77a04SHong Zhang   PetscScalar    *array;
373b2b77a04SHong Zhang 
374b2b77a04SHong Zhang   PetscFunctionBegin;
375b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
376b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
377b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
378b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
379b2b77a04SHong Zhang   PetscFunctionReturn(0);
380b2b77a04SHong Zhang }
381b2b77a04SHong Zhang 
3824f7415efSAmlan Barua #undef __FUNCT__
3834be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW"
3844be45526SHong Zhang /*@
385b2aa233eSHong Zhang    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
3864f7415efSAmlan Barua      parallel layout determined by FFTW
3874f7415efSAmlan Barua 
3884f7415efSAmlan Barua    Collective on Mat
3894f7415efSAmlan Barua 
3904f7415efSAmlan Barua    Input Parameter:
3913564f093SHong Zhang .   A - the matrix
3924f7415efSAmlan Barua 
3934f7415efSAmlan Barua    Output Parameter:
394cc55f3b1SHong Zhang +   x - (optional) input vector of forward FFTW
395cc55f3b1SHong Zhang -   y - (optional) output vector of forward FFTW
396cc55f3b1SHong Zhang -   z - (optional) output vector of backward FFTW
3974f7415efSAmlan Barua 
3984f7415efSAmlan Barua   Level: advanced
3993564f093SHong Zhang 
40097504071SAmlan Barua   Note: The parallel layout of output of forward FFTW is always same as the input
40197504071SAmlan Barua         of the backward FFTW. But parallel layout of the input vector of forward
40297504071SAmlan Barua         FFTW might not be same as the output of backward FFTW.
40397504071SAmlan Barua         Also note that we need to provide enough space while doing parallel real transform.
40497504071SAmlan Barua         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
40597504071SAmlan Barua         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
406e0ec536eSAmlan Barua         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
407e0ec536eSAmlan Barua         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
408e0ec536eSAmlan Barua         zeros if it is odd only one zero is needed.
409e0ec536eSAmlan Barua         Lastly one needs some scratch space at the end of data set in each process. alloc_local
410e0ec536eSAmlan Barua         figures out how much space is needed, i.e. it figures out the data+scratch space for
411e0ec536eSAmlan Barua         each processor and returns that.
4124f7415efSAmlan Barua 
4134f7415efSAmlan Barua .seealso: MatCreateFFTW()
4144be45526SHong Zhang @*/
4154be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
4164be45526SHong Zhang {
4174be45526SHong Zhang   PetscErrorCode ierr;
418b4c3921fSHong Zhang 
4194be45526SHong Zhang   PetscFunctionBegin;
4204be45526SHong Zhang   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
4214be45526SHong Zhang   PetscFunctionReturn(0);
4224be45526SHong Zhang }
4234be45526SHong Zhang 
4244be45526SHong Zhang #undef __FUNCT__
4254be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW"
4264be45526SHong Zhang PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
4274f7415efSAmlan Barua {
4284f7415efSAmlan Barua   PetscErrorCode ierr;
4294f7415efSAmlan Barua   PetscMPIInt    size,rank;
430ce94432eSBarry Smith   MPI_Comm       comm;
4314f7415efSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
4324f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
4339496c9aeSAmlan Barua   PetscInt       N     = fft->N;
4344f7415efSAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
4354f7415efSAmlan Barua 
4364f7415efSAmlan Barua   PetscFunctionBegin;
4374f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
4384f7415efSAmlan Barua   PetscValidType(A,1);
439ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4404f7415efSAmlan Barua 
4414f8276c3SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
4424f8276c3SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
4434f7415efSAmlan Barua   if (size == 1) { /* sequential case */
4444f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4454f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
4464f7415efSAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
4474f7415efSAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
4484f7415efSAmlan Barua #else
4498ca4c034SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
4508ca4c034SAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
4518ca4c034SAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
4524f7415efSAmlan Barua #endif
45397504071SAmlan Barua   } else { /* parallel cases */
4544f7415efSAmlan Barua     ptrdiff_t    alloc_local,local_n0,local_0_start;
4559496c9aeSAmlan Barua     ptrdiff_t    local_n1;
4569496c9aeSAmlan Barua     fftw_complex *data_fout;
4579496c9aeSAmlan Barua     ptrdiff_t    local_1_start;
4589496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4599496c9aeSAmlan Barua     fftw_complex *data_fin,*data_bout;
4609496c9aeSAmlan Barua #else
4614f7415efSAmlan Barua     double    *data_finr,*data_boutr;
4626ccf0b3eSHong Zhang     PetscInt  n1,N1;
4639496c9aeSAmlan Barua     ptrdiff_t temp;
4649496c9aeSAmlan Barua #endif
4659496c9aeSAmlan Barua 
4664f7415efSAmlan Barua     switch (ndim) {
4674f7415efSAmlan Barua     case 1:
46857625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4694f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
47057625b84SAmlan Barua #else
47157625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
47257625b84SAmlan Barua       if (fin) {
47357625b84SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
474778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
47526fbe8dcSKarl Rupp 
47657625b84SAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
47757625b84SAmlan Barua       }
47857625b84SAmlan Barua       if (fout) {
47957625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
480778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
48126fbe8dcSKarl Rupp 
48257625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
48357625b84SAmlan Barua       }
48457625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
48557625b84SAmlan Barua       if (bout) {
48657625b84SAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
487778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
48826fbe8dcSKarl Rupp 
48957625b84SAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
49057625b84SAmlan Barua       }
49157625b84SAmlan Barua       break;
49257625b84SAmlan Barua #endif
4934f7415efSAmlan Barua     case 2:
49497504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
4954f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4964f7415efSAmlan Barua       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
4974f7415efSAmlan Barua       if (fin) {
4984f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
499778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
50026fbe8dcSKarl Rupp 
5014f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5024f7415efSAmlan Barua       }
5034f7415efSAmlan Barua       if (bout) {
5044f7415efSAmlan Barua         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
505778a2246SBarry Smith         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
50626fbe8dcSKarl Rupp 
5074f7415efSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5084f7415efSAmlan Barua       }
50957625b84SAmlan Barua       if (fout) {
51057625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
511778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
51226fbe8dcSKarl Rupp 
51357625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
51457625b84SAmlan Barua       }
5154f7415efSAmlan Barua #else
5164f7415efSAmlan Barua       /* Get local size */
5174f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
5184f7415efSAmlan Barua       if (fin) {
5194f7415efSAmlan Barua         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
520778a2246SBarry Smith         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
52126fbe8dcSKarl Rupp 
5224f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5234f7415efSAmlan Barua       }
5244f7415efSAmlan Barua       if (fout) {
5254f7415efSAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
526778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
52726fbe8dcSKarl Rupp 
5284f7415efSAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
5294f7415efSAmlan Barua       }
5304f7415efSAmlan Barua       if (bout) {
5314f7415efSAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
532778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
53326fbe8dcSKarl Rupp 
5344f7415efSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5354f7415efSAmlan Barua       }
5364f7415efSAmlan Barua #endif
5374f7415efSAmlan Barua       break;
5384f7415efSAmlan Barua     case 3:
5394f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5404f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5414f7415efSAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
5424f7415efSAmlan Barua       if (fin) {
5434f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
544778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
54526fbe8dcSKarl Rupp 
5464f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5474f7415efSAmlan Barua       }
5484f7415efSAmlan Barua       if (bout) {
5494f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
550778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
55126fbe8dcSKarl Rupp 
5524f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5534f7415efSAmlan Barua       }
5543564f093SHong Zhang 
55557625b84SAmlan Barua       if (fout) {
55657625b84SAmlan Barua         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
557778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
55826fbe8dcSKarl Rupp 
55957625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
56057625b84SAmlan Barua       }
5614f7415efSAmlan Barua #else
5620c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
5630c9b18e4SAmlan Barua       if (fin) {
5640c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
565778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
56626fbe8dcSKarl Rupp 
5670c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5680c9b18e4SAmlan Barua       }
5690c9b18e4SAmlan Barua       if (fout) {
5700c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
571778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
57226fbe8dcSKarl Rupp 
5730c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5740c9b18e4SAmlan Barua       }
5750c9b18e4SAmlan Barua       if (bout) {
5760c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
577778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
57826fbe8dcSKarl Rupp 
5790c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5800c9b18e4SAmlan Barua       }
5814f7415efSAmlan Barua #endif
5824f7415efSAmlan Barua       break;
5834f7415efSAmlan Barua     default:
5844f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5854f7415efSAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
58626fbe8dcSKarl Rupp 
5874f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
58826fbe8dcSKarl Rupp 
5894f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5904f7415efSAmlan Barua       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
59126fbe8dcSKarl Rupp 
5924f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
5934f7415efSAmlan Barua 
5944f7415efSAmlan Barua       if (fin) {
5954f7415efSAmlan Barua         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
596778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
59726fbe8dcSKarl Rupp 
5984f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5994f7415efSAmlan Barua       }
6004f7415efSAmlan Barua       if (bout) {
6014f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
602778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
60326fbe8dcSKarl Rupp 
6049496c9aeSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
6054f7415efSAmlan Barua       }
6063564f093SHong Zhang 
60757625b84SAmlan Barua       if (fout) {
60857625b84SAmlan Barua         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
609778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
61026fbe8dcSKarl Rupp 
61157625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
61257625b84SAmlan Barua       }
6134f7415efSAmlan Barua #else
6140c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
6150c9b18e4SAmlan Barua       if (fin) {
6160c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
617778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
61826fbe8dcSKarl Rupp 
6190c9b18e4SAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
6200c9b18e4SAmlan Barua       }
6210c9b18e4SAmlan Barua       if (fout) {
6220c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
623778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
62426fbe8dcSKarl Rupp 
6250c9b18e4SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
6260c9b18e4SAmlan Barua       }
6270c9b18e4SAmlan Barua       if (bout) {
6280c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
629778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
63026fbe8dcSKarl Rupp 
6310c9b18e4SAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
6320c9b18e4SAmlan Barua       }
6334f7415efSAmlan Barua #endif
6344f7415efSAmlan Barua       break;
6354f7415efSAmlan Barua     }
6364f7415efSAmlan Barua   }
6374f7415efSAmlan Barua   PetscFunctionReturn(0);
6384f7415efSAmlan Barua }
6394f7415efSAmlan Barua 
640c92418ccSAmlan Barua #undef __FUNCT__
64174a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW"
6423564f093SHong Zhang /*@
6433564f093SHong Zhang    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
64454efbe56SHong Zhang 
6453564f093SHong Zhang    Collective on Mat
6463564f093SHong Zhang 
6473564f093SHong Zhang    Input Parameters:
6483564f093SHong Zhang +  A - FFTW matrix
6493564f093SHong Zhang -  x - the PETSc vector
6503564f093SHong Zhang 
6513564f093SHong Zhang    Output Parameters:
6523564f093SHong Zhang .  y - the FFTW vector
6533564f093SHong Zhang 
654b2b77a04SHong Zhang   Options Database Keys:
6553564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags
656b2b77a04SHong Zhang 
657b2b77a04SHong Zhang    Level: intermediate
6583564f093SHong Zhang 
65997504071SAmlan Barua    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
66097504071SAmlan Barua          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
66197504071SAmlan Barua          zeros. This routine does that job by scattering operation.
66297504071SAmlan Barua 
6633564f093SHong Zhang .seealso: VecScatterFFTWToPetsc()
6643564f093SHong Zhang @*/
6653564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
6663564f093SHong Zhang {
6673564f093SHong Zhang   PetscErrorCode ierr;
668b2b77a04SHong Zhang 
6693564f093SHong Zhang   PetscFunctionBegin;
6703564f093SHong Zhang   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
6713564f093SHong Zhang   PetscFunctionReturn(0);
6723564f093SHong Zhang }
6733c3a9c75SAmlan Barua 
6743c3a9c75SAmlan Barua #undef __FUNCT__
6751986ecc6SHong Zhang #define __FUNCT__ "VecScatterPetscToFFTW_FFTW"
67674a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
6773c3a9c75SAmlan Barua {
6783c3a9c75SAmlan Barua   PetscErrorCode ierr;
679ce94432eSBarry Smith   MPI_Comm       comm;
6803c3a9c75SAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
6813c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
6829496c9aeSAmlan Barua   PetscInt       N     =fft->N;
683b5d11533SAmlan Barua   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
6849496c9aeSAmlan Barua   PetscInt       low;
6857a21806cSHong Zhang   PetscMPIInt    rank,size;
6867a21806cSHong Zhang   PetscInt       vsize,vsize1;
6877a21806cSHong Zhang   ptrdiff_t      local_n0,local_0_start;
6889496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
6893c3a9c75SAmlan Barua   VecScatter     vecscat;
6903c3a9c75SAmlan Barua   IS             list1,list2;
6919496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6929496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
6939496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
6949496c9aeSAmlan Barua   PetscInt       N1, n1,NM;
6959496c9aeSAmlan Barua   ptrdiff_t      temp;
6969496c9aeSAmlan Barua #endif
6973c3a9c75SAmlan Barua 
6983564f093SHong Zhang   PetscFunctionBegin;
699ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
700f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
701f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
7020298fd71SBarry Smith   ierr = VecGetOwnershipRange(y,&low,NULL);
7033c3a9c75SAmlan Barua 
7043564f093SHong Zhang   if (size==1) {
7058ca4c034SAmlan Barua     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
7068ca4c034SAmlan Barua     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
7079496c9aeSAmlan Barua     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
7086971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
7096971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7106971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7116971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
712b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
7133564f093SHong Zhang   } else {
7143c3a9c75SAmlan Barua     switch (ndim) {
7153c3a9c75SAmlan Barua     case 1:
71664657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7177a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
71826fbe8dcSKarl Rupp 
7194f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);
7204f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,low,1,&list2);
72164657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
72264657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
72364657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
72464657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
72564657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
72664657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
72764657d84SAmlan Barua #else
728e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
72964657d84SAmlan Barua #endif
7303c3a9c75SAmlan Barua       break;
7313c3a9c75SAmlan Barua     case 2:
732bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7337a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
73426fbe8dcSKarl Rupp 
7354f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
7364f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
737bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
738bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
739bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
741bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
742bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
743bd59e6c6SAmlan Barua #else
744c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
74526fbe8dcSKarl Rupp 
7463c3a9c75SAmlan Barua       N1   = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
7479496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
7489496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
7493c3a9c75SAmlan Barua 
7503564f093SHong Zhang       if (dim[1]%2==0) {
7513c3a9c75SAmlan Barua         NM = dim[1]+2;
7523564f093SHong Zhang       } else {
7533c3a9c75SAmlan Barua         NM = dim[1]+1;
7543564f093SHong Zhang       }
7553c3a9c75SAmlan Barua       for (i=0; i<local_n0; i++) {
7563c3a9c75SAmlan Barua         for (j=0; j<dim[1]; j++) {
7573c3a9c75SAmlan Barua           tempindx  = i*dim[1] + j;
7583c3a9c75SAmlan Barua           tempindx1 = i*NM + j;
75926fbe8dcSKarl Rupp 
7605b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
7613c3a9c75SAmlan Barua           indx2[tempindx]=low+tempindx1;
7623c3a9c75SAmlan Barua         }
7633c3a9c75SAmlan Barua       }
7643c3a9c75SAmlan Barua 
7653c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
7663c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
7673c3a9c75SAmlan Barua 
768f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
769f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
770f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
771f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
772b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
773b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
774b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
775b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
776bd59e6c6SAmlan Barua #endif
7779496c9aeSAmlan Barua       break;
7783c3a9c75SAmlan Barua 
7793c3a9c75SAmlan Barua     case 3:
780bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7817a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
78226fbe8dcSKarl Rupp 
7834f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
7844f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
785bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
786bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
787bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
788bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
789bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
790bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
791bd59e6c6SAmlan Barua #else
7927a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
79326fbe8dcSKarl Rupp 
79451d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
79551d1eed7SAmlan Barua 
7969496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
7979496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
79851d1eed7SAmlan Barua 
799db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
800db4deed7SKarl Rupp       else             NM = dim[2]+1;
80151d1eed7SAmlan Barua 
80251d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
80351d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
80451d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
80551d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
80651d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
80726fbe8dcSKarl Rupp 
80851d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
80951d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
81051d1eed7SAmlan Barua           }
81151d1eed7SAmlan Barua         }
81251d1eed7SAmlan Barua       }
81351d1eed7SAmlan Barua 
81451d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
81551d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
81651d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
81751d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
81851d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
81951d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
820b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
821b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
822b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
823b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
824bd59e6c6SAmlan Barua #endif
8259496c9aeSAmlan Barua       break;
8263c3a9c75SAmlan Barua 
8273c3a9c75SAmlan Barua     default:
828bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
8297a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
83026fbe8dcSKarl Rupp 
8314f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
8324f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
833bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
834bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
835bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
836bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
837bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
838bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
839bd59e6c6SAmlan Barua #else
840e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
84126fbe8dcSKarl Rupp 
842e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
84326fbe8dcSKarl Rupp 
8447a21806cSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
84526fbe8dcSKarl Rupp 
846e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
84726fbe8dcSKarl Rupp 
848e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
849e5d7f247SAmlan Barua 
850e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
851e5d7f247SAmlan Barua 
852e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
853e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
854e5d7f247SAmlan Barua 
855db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
856db4deed7SKarl Rupp       else                  NM = 1;
857e5d7f247SAmlan Barua 
8586971673cSAmlan Barua       j = low;
8593564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
8606971673cSAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
8616971673cSAmlan Barua         indx2[i] = j;
86226fbe8dcSKarl Rupp         if (k%dim[ndim-1]==0) j+=NM;
8636971673cSAmlan Barua         j++;
8646971673cSAmlan Barua       }
8656971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
8666971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
8676971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
8686971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8696971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8706971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
871b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
872b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
873b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
874b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
875bd59e6c6SAmlan Barua #endif
8769496c9aeSAmlan Barua       break;
8773c3a9c75SAmlan Barua     }
878e81bb599SAmlan Barua   }
8793564f093SHong Zhang   PetscFunctionReturn(0);
8803c3a9c75SAmlan Barua }
8813c3a9c75SAmlan Barua 
8823c3a9c75SAmlan Barua #undef __FUNCT__
88374a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc"
8843564f093SHong Zhang /*@
8853564f093SHong Zhang    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
8863564f093SHong Zhang 
8873564f093SHong Zhang    Collective on Mat
8883564f093SHong Zhang 
8893564f093SHong Zhang     Input Parameters:
8903564f093SHong Zhang +   A - FFTW matrix
8913564f093SHong Zhang -   x - FFTW vector
8923564f093SHong Zhang 
8933564f093SHong Zhang    Output Parameters:
8943564f093SHong Zhang .  y - PETSc vector
8953564f093SHong Zhang 
8963564f093SHong Zhang    Level: intermediate
8973564f093SHong Zhang 
8983564f093SHong Zhang    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
8993564f093SHong Zhang          VecScatterFFTWToPetsc removes those extra zeros.
9003564f093SHong Zhang 
9013564f093SHong Zhang .seealso: VecScatterPetscToFFTW()
9023564f093SHong Zhang @*/
90374a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
9043c3a9c75SAmlan Barua {
9053c3a9c75SAmlan Barua   PetscErrorCode ierr;
9065fd66863SKarl Rupp 
9073c3a9c75SAmlan Barua   PetscFunctionBegin;
90874a26884SAmlan Barua   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
9093c3a9c75SAmlan Barua   PetscFunctionReturn(0);
9103c3a9c75SAmlan Barua }
91154efbe56SHong Zhang 
9123c3a9c75SAmlan Barua #undef __FUNCT__
91374a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
91474a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
9155b3e41ffSAmlan Barua {
9165b3e41ffSAmlan Barua   PetscErrorCode ierr;
917ce94432eSBarry Smith   MPI_Comm       comm;
9185b3e41ffSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
9195b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
9209496c9aeSAmlan Barua   PetscInt       N     = fft->N;
921b3655f67SAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
9229496c9aeSAmlan Barua   PetscInt       low;
9237a21806cSHong Zhang   PetscMPIInt    rank,size;
9247a21806cSHong Zhang   ptrdiff_t      local_n0,local_0_start;
9259496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
9265b3e41ffSAmlan Barua   VecScatter     vecscat;
9275b3e41ffSAmlan Barua   IS             list1,list2;
9289496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
9299496c9aeSAmlan Barua   PetscInt  i,j,k,partial_dim;
9309496c9aeSAmlan Barua   PetscInt  *indx1, *indx2, tempindx, tempindx1;
9319496c9aeSAmlan Barua   PetscInt  N1, n1,NM;
9329496c9aeSAmlan Barua   ptrdiff_t temp;
9339496c9aeSAmlan Barua #endif
9349496c9aeSAmlan Barua 
9353564f093SHong Zhang   PetscFunctionBegin;
936ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
9375b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
9385b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
9390298fd71SBarry Smith   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
9405b3e41ffSAmlan Barua 
941e81bb599SAmlan Barua   if (size==1) {
942b3655f67SAmlan Barua     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
9436971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
9446971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9456971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9466971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
947b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
948e81bb599SAmlan Barua 
9493564f093SHong Zhang   } else {
950e81bb599SAmlan Barua     switch (ndim) {
951e81bb599SAmlan Barua     case 1:
95264657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
9537a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
95426fbe8dcSKarl Rupp 
9554f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);
9564f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,low,1,&list2);
95764657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
95864657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
95964657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
96064657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
96164657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
96264657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
96364657d84SAmlan Barua #else
9646a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
96564657d84SAmlan Barua #endif
9665b3e41ffSAmlan Barua       break;
9675b3e41ffSAmlan Barua     case 2:
968bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
9697a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
97026fbe8dcSKarl Rupp 
9714f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
9724f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
973bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
974bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
975bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
976bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
977bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
978bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
979bd59e6c6SAmlan Barua #else
9807a21806cSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
98126fbe8dcSKarl Rupp 
9825b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
9835b3e41ffSAmlan Barua 
9849496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
9859496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
9865b3e41ffSAmlan Barua 
987db4deed7SKarl Rupp       if (dim[1]%2==0) NM = dim[1]+2;
988db4deed7SKarl Rupp       else             NM = dim[1]+1;
9895b3e41ffSAmlan Barua 
9905b3e41ffSAmlan Barua       for (i=0; i<local_n0; i++) {
9915b3e41ffSAmlan Barua         for (j=0; j<dim[1]; j++) {
9925b3e41ffSAmlan Barua           tempindx = i*dim[1] + j;
9935b3e41ffSAmlan Barua           tempindx1 = i*NM + j;
99426fbe8dcSKarl Rupp 
9955b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
9965b3e41ffSAmlan Barua           indx2[tempindx]=low+tempindx1;
9975b3e41ffSAmlan Barua         }
9985b3e41ffSAmlan Barua       }
9995b3e41ffSAmlan Barua 
10005b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
10015b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
10025b3e41ffSAmlan Barua 
10035b3e41ffSAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
10045b3e41ffSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10055b3e41ffSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10065b3e41ffSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1007b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1008b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1009b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1010b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1011bd59e6c6SAmlan Barua #endif
10129496c9aeSAmlan Barua       break;
10135b3e41ffSAmlan Barua     case 3:
1014bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10157a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
101626fbe8dcSKarl Rupp 
10174f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
10184f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
1019bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1020bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1021bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1022bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1023bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1024bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1025bd59e6c6SAmlan Barua #else
1026bd59e6c6SAmlan Barua 
10277a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
102826fbe8dcSKarl Rupp 
102951d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
103051d1eed7SAmlan Barua 
10319496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
10329496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
103351d1eed7SAmlan Barua 
1034db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
1035db4deed7SKarl Rupp       else             NM = dim[2]+1;
103651d1eed7SAmlan Barua 
103751d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
103851d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
103951d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
104051d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
104151d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
104226fbe8dcSKarl Rupp 
104351d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
104451d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
104551d1eed7SAmlan Barua           }
104651d1eed7SAmlan Barua         }
104751d1eed7SAmlan Barua       }
104851d1eed7SAmlan Barua 
104951d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
105051d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
105151d1eed7SAmlan Barua 
105251d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
105351d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
105451d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
105551d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1056b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1057b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1058b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1059b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1060bd59e6c6SAmlan Barua #endif
10619496c9aeSAmlan Barua       break;
10625b3e41ffSAmlan Barua     default:
1063bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10647a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
106526fbe8dcSKarl Rupp 
10664f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
10674f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1068bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1069bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1070bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1071bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1072bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1073bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1074bd59e6c6SAmlan Barua #else
1075ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
107626fbe8dcSKarl Rupp 
1077ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
107826fbe8dcSKarl Rupp 
1079c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
108026fbe8dcSKarl Rupp 
1081ba6e06d0SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
108226fbe8dcSKarl Rupp 
1083ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1084ba6e06d0SAmlan Barua 
1085ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1086ba6e06d0SAmlan Barua 
1087ba6e06d0SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1088ba6e06d0SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1089ba6e06d0SAmlan Barua 
1090db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
1091db4deed7SKarl Rupp       else                  NM = 1;
1092ba6e06d0SAmlan Barua 
1093ba6e06d0SAmlan Barua       j = low;
10943564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1095ba6e06d0SAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
1096ba6e06d0SAmlan Barua         indx2[i] = j;
10973564f093SHong Zhang         if (k%dim[ndim-1]==0) j+=NM;
1098ba6e06d0SAmlan Barua         j++;
1099ba6e06d0SAmlan Barua       }
1100ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1101ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1102ba6e06d0SAmlan Barua 
1103ba6e06d0SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1104ba6e06d0SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1105ba6e06d0SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1106ba6e06d0SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1107b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1108b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1109b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1110b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1111bd59e6c6SAmlan Barua #endif
11129496c9aeSAmlan Barua       break;
11135b3e41ffSAmlan Barua     }
1114e81bb599SAmlan Barua   }
11153564f093SHong Zhang   PetscFunctionReturn(0);
11165b3e41ffSAmlan Barua }
11175b3e41ffSAmlan Barua 
11185b3e41ffSAmlan Barua #undef __FUNCT__
11193c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
11203c3a9c75SAmlan Barua /*
11213564f093SHong Zhang     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
11223564f093SHong Zhang 
11233c3a9c75SAmlan Barua   Options Database Keys:
11243c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
11253c3a9c75SAmlan Barua 
11263c3a9c75SAmlan Barua    Level: intermediate
11273c3a9c75SAmlan Barua 
11283c3a9c75SAmlan Barua */
11298cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1130b2b77a04SHong Zhang {
1131b2b77a04SHong Zhang   PetscErrorCode ierr;
1132ce94432eSBarry Smith   MPI_Comm       comm;
1133b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1134b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1135b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
11365274ed1bSHong Zhang   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
11375274ed1bSHong Zhang   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1138b2b77a04SHong Zhang   PetscBool      flg;
11394f48bc67SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr;
114011902ff2SHong Zhang   PetscMPIInt    size,rank;
11419496c9aeSAmlan Barua   ptrdiff_t      *pdim;
11429496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
11439496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11449496c9aeSAmlan Barua   ptrdiff_t temp;
11458ca4c034SAmlan Barua   PetscInt  N1,tot_dim;
11464f48bc67SAmlan Barua #else
11474f48bc67SAmlan Barua   PetscInt n1;
11489496c9aeSAmlan Barua #endif
11499496c9aeSAmlan Barua 
1150b2b77a04SHong Zhang   PetscFunctionBegin;
1151ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1152b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
115311902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1154c92418ccSAmlan Barua 
11551878d478SAmlan Barua   fftw_mpi_init();
115611902ff2SHong Zhang   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
115711902ff2SHong Zhang   pdim[0] = dim[0];
11588ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11598ca4c034SAmlan Barua   tot_dim = 2*dim[0];
11608ca4c034SAmlan Barua #endif
11613564f093SHong Zhang   for (ctr=1; ctr<ndim; ctr++) {
11626882372aSHong Zhang     partial_dim *= dim[ctr];
116311902ff2SHong Zhang     pdim[ctr]    = dim[ctr];
11648ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1165db4deed7SKarl Rupp     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1166db4deed7SKarl Rupp     else             tot_dim *= dim[ctr];
11678ca4c034SAmlan Barua #endif
11686882372aSHong Zhang   }
11693c3a9c75SAmlan Barua 
1170b2b77a04SHong Zhang   if (size == 1) {
1171e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1172b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1173b2b77a04SHong Zhang     n    = N;
1174e81bb599SAmlan Barua #else
1175e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1176e81bb599SAmlan Barua     n    = tot_dim;
1177e81bb599SAmlan Barua #endif
1178e81bb599SAmlan Barua 
1179b2b77a04SHong Zhang   } else {
11807a21806cSHong Zhang     ptrdiff_t local_n0,local_0_start;
1181b2b77a04SHong Zhang     switch (ndim) {
1182b2b77a04SHong Zhang     case 1:
11833c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11843c3a9c75SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1185e5d7f247SAmlan Barua #else
11867a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
118726fbe8dcSKarl Rupp 
11886882372aSHong Zhang       n    = (PetscInt)local_n0;
11899496c9aeSAmlan Barua       n1   = (PetscInt)local_n1;
11909496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1191e5d7f247SAmlan Barua #endif
1192b2b77a04SHong Zhang       break;
1193b2b77a04SHong Zhang     case 2:
11945b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
11957a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1196b2b77a04SHong Zhang       /*
1197b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
11980ec8b6e3SBarry Smith        PetscSynchronizedFlush(comm,PETSC_STDOUT);
1199b2b77a04SHong Zhang        */
1200b2b77a04SHong Zhang       n    = (PetscInt)local_n0*dim[1];
1201b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
12025b3e41ffSAmlan Barua #else
1203c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
120426fbe8dcSKarl Rupp 
12055b3e41ffSAmlan Barua       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
1206c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
12075b3e41ffSAmlan Barua #endif
1208b2b77a04SHong Zhang       break;
1209b2b77a04SHong Zhang     case 3:
121051d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12117a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
121226fbe8dcSKarl Rupp 
12136882372aSHong Zhang       n    = (PetscInt)local_n0*dim[1]*dim[2];
12146882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
121551d1eed7SAmlan Barua #else
1216c3eba89aSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
121726fbe8dcSKarl Rupp 
121851d1eed7SAmlan Barua       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1219c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
122051d1eed7SAmlan Barua #endif
1221b2b77a04SHong Zhang       break;
1222b2b77a04SHong Zhang     default:
1223b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12247a21806cSHong Zhang       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
122526fbe8dcSKarl Rupp 
12266882372aSHong Zhang       n    = (PetscInt)local_n0*partial_dim;
12276882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1228b3a17365SAmlan Barua #else
1229b3a17365SAmlan Barua       temp = pdim[ndim-1];
123026fbe8dcSKarl Rupp 
1231b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
123226fbe8dcSKarl Rupp 
1233c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
123426fbe8dcSKarl Rupp 
1235b3a17365SAmlan Barua       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1236b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
123726fbe8dcSKarl Rupp 
1238b3a17365SAmlan Barua       pdim[ndim-1] = temp;
123926fbe8dcSKarl Rupp 
1240c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1241b3a17365SAmlan Barua #endif
1242b2b77a04SHong Zhang       break;
1243b2b77a04SHong Zhang     }
1244b2b77a04SHong Zhang   }
1245b2b77a04SHong Zhang   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1246b00a9115SJed Brown   ierr      = PetscNewLog(A,&fftw);CHKERRQ(ierr);
1247b2b77a04SHong Zhang   fft->data = (void*)fftw;
1248b2b77a04SHong Zhang 
1249b2b77a04SHong Zhang   fft->n            = n;
12500c74a584SJed Brown   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1251e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
125226fbe8dcSKarl Rupp 
12535e806cb5SMatthew G. Knepley   ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr);
12548c1d5ab3SHong Zhang   if (size == 1) {
1255*a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
1256*a1b6d50cSHong Zhang     fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1257*a1b6d50cSHong Zhang #else
12588c1d5ab3SHong Zhang     fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1259*a1b6d50cSHong Zhang #endif
12608c1d5ab3SHong Zhang   }
126126fbe8dcSKarl Rupp 
1262b1301b2eSHong Zhang   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1263c92418ccSAmlan Barua 
1264b2b77a04SHong Zhang   fftw->p_forward  = 0;
1265b2b77a04SHong Zhang   fftw->p_backward = 0;
1266b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1267b2b77a04SHong Zhang 
1268b2b77a04SHong Zhang   if (size == 1) {
1269b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1270b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1271b2b77a04SHong Zhang   } else {
1272b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1273b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1274b2b77a04SHong Zhang   }
1275b2b77a04SHong Zhang   fft->matdestroy = MatDestroy_FFTW;
1276b2b77a04SHong Zhang   A->assembled    = PETSC_TRUE;
12774a52bad8SHong Zhang   A->preallocated = PETSC_TRUE;
127826fbe8dcSKarl Rupp 
1279bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr);
1280bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
1281bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1282b2b77a04SHong Zhang 
1283b2b77a04SHong Zhang   /* get runtime options */
1284ce94432eSBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
12855274ed1bSHong Zhang   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr);
12865274ed1bSHong Zhang   if (flg) {
12875274ed1bSHong Zhang     fftw->p_flag = iplans[p_flag];
12885274ed1bSHong Zhang   }
12894a52bad8SHong Zhang   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1290b2b77a04SHong Zhang   PetscFunctionReturn(0);
1291b2b77a04SHong Zhang }
12923c3a9c75SAmlan Barua 
12933c3a9c75SAmlan Barua 
12943c3a9c75SAmlan Barua 
12953c3a9c75SAmlan Barua 
1296