xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 8c1d5ab369abe91f47a3d8806b1b424684d40086)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14*8c1d5ab3SHong Zhang   fftw_iodim  *iodims;
15e5d7f247SAmlan Barua   PetscInt    partial_dim;
16b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
17b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
18b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
19b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
20b2b77a04SHong Zhang } Mat_FFTW;
21b2b77a04SHong Zhang 
22b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
25b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
26b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
27b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
28b2b77a04SHong Zhang 
2997504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel
3097504071SAmlan Barua    Input parameter:
313564f093SHong Zhang      A - the matrix
3297504071SAmlan Barua      x - the vector on which FDFT will be performed
3397504071SAmlan Barua 
3497504071SAmlan Barua    Output parameter:
3597504071SAmlan Barua      y - vector that stores result of FDFT
3697504071SAmlan Barua */
37b2b77a04SHong Zhang #undef __FUNCT__
38b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
39b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
40b2b77a04SHong Zhang {
41b2b77a04SHong Zhang   PetscErrorCode ierr;
42b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
43b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
44b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
45*8c1d5ab3SHong Zhang   PetscInt       ndim = fft->ndim,*dim = fft->dim,i;
46*8c1d5ab3SHong Zhang   //int            *dim_32,i;
47*8c1d5ab3SHong Zhang   fftw_iodim     *iodims;
48b2b77a04SHong Zhang 
49b2b77a04SHong Zhang   PetscFunctionBegin;
50*8c1d5ab3SHong Zhang   //ierr = PetscMalloc(ndim*sizeof(int),&dim_32);CHKERRQ(ierr);
51*8c1d5ab3SHong Zhang   //for (i=0; i<ndim; i++) dim_32[i] = (int)dim[i];
527a21806cSHong Zhang 
53b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
54b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
55b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
56b2b77a04SHong Zhang     switch (ndim) {
57b2b77a04SHong Zhang     case 1:
5858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
59b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6058a912c5SAmlan Barua #else
6158a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
6258a912c5SAmlan Barua #endif
63b2b77a04SHong Zhang       break;
64b2b77a04SHong Zhang     case 2:
6558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
66b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6758a912c5SAmlan Barua #else
6858a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
6958a912c5SAmlan Barua #endif
70b2b77a04SHong Zhang       break;
71b2b77a04SHong Zhang     case 3:
7258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
73b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
747a21806cSHong Zhang       //fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,FFTW_MEASURE);
7558a912c5SAmlan Barua #else
7658a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
7758a912c5SAmlan Barua #endif
78b2b77a04SHong Zhang       break;
79b2b77a04SHong Zhang     default:
8058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
81*8c1d5ab3SHong Zhang       //fftw->p_forward = fftw_plan_dft((int)ndim,(int*)dim_32,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
825274ed1bSHong Zhang       /* bug: dim[3] changes from 10 to 0 '--with-64-bit-indices=1' */
83*8c1d5ab3SHong Zhang 
84*8c1d5ab3SHong Zhang       iodims = (fftw_iodim*)fftw->iodims;
85*8c1d5ab3SHong Zhang       //ierr = PetscMalloc(ndim*sizeof(fftw_iodim),&iodims);CHKERRQ(ierr);
86*8c1d5ab3SHong Zhang       if (ndim) {
87*8c1d5ab3SHong Zhang         iodims[ndim-1].n = dim[ndim-1];
88*8c1d5ab3SHong Zhang         iodims[ndim-1].is = iodims[ndim-1].os = 1;
89*8c1d5ab3SHong Zhang         for (i=ndim-2; i>=0; --i) {
90*8c1d5ab3SHong Zhang           iodims[i].n = dim[i];
91*8c1d5ab3SHong Zhang           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
92*8c1d5ab3SHong Zhang         }
93*8c1d5ab3SHong Zhang       }
94*8c1d5ab3SHong Zhang 
95*8c1d5ab3SHong Zhang       fftw->p_forward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
96*8c1d5ab3SHong Zhang 
9758a912c5SAmlan Barua #else
9858a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
9958a912c5SAmlan Barua #endif
100b2b77a04SHong Zhang       break;
101b2b77a04SHong Zhang     }
102b2b77a04SHong Zhang     fftw->finarray  = x_array;
103b2b77a04SHong Zhang     fftw->foutarray = y_array;
104b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
105b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
106b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
107b2b77a04SHong Zhang   } else { /* use existing plan */
108b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
109b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
110b2b77a04SHong Zhang     } else {
111b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
112b2b77a04SHong Zhang     }
113b2b77a04SHong Zhang   }
114b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
115b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
116*8c1d5ab3SHong Zhang   //ierr = PetscFree(dim_32);CHKERRQ(ierr);
117b2b77a04SHong Zhang   PetscFunctionReturn(0);
118b2b77a04SHong Zhang }
119b2b77a04SHong Zhang 
12097504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT
12197504071SAmlan Barua    Input parameter:
1223564f093SHong Zhang      A - the matrix
12397504071SAmlan Barua      x - the vector on which BDFT will be performed
12497504071SAmlan Barua 
12597504071SAmlan Barua    Output parameter:
12697504071SAmlan Barua      y - vector that stores result of BDFT
12797504071SAmlan Barua */
12897504071SAmlan Barua 
129b2b77a04SHong Zhang #undef __FUNCT__
130b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
131b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
132b2b77a04SHong Zhang {
133b2b77a04SHong Zhang   PetscErrorCode ierr;
134b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
135b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
136b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
137b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
138*8c1d5ab3SHong Zhang   fftw_iodim     *iodims=(fftw_iodim*)fftw->iodims;
139*8c1d5ab3SHong Zhang   //int            *dim_32,i; /* for 64-bit */
140b2b77a04SHong Zhang 
141b2b77a04SHong Zhang   PetscFunctionBegin;
142*8c1d5ab3SHong Zhang   //ierr = PetscMalloc(ndim*sizeof(int),&dim_32);
143*8c1d5ab3SHong Zhang   //for (i=0; i<ndim; i++) dim_32[i] = (int)dim[i];
1447a21806cSHong Zhang 
145b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
146b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
147b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
148b2b77a04SHong Zhang     switch (ndim) {
149b2b77a04SHong Zhang     case 1:
15058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
151b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
15258a912c5SAmlan Barua #else
153e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
15458a912c5SAmlan Barua #endif
155b2b77a04SHong Zhang       break;
156b2b77a04SHong Zhang     case 2:
15758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
158b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
15958a912c5SAmlan Barua #else
160e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
16158a912c5SAmlan Barua #endif
162b2b77a04SHong Zhang       break;
163b2b77a04SHong Zhang     case 3:
16458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
165b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
1667a21806cSHong Zhang       //fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,FFTW_MEASURE);
16758a912c5SAmlan Barua #else
168e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
16958a912c5SAmlan Barua #endif
170b2b77a04SHong Zhang       break;
171b2b77a04SHong Zhang     default:
17258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
173*8c1d5ab3SHong Zhang       //fftw->p_backward = fftw_plan_dft((int)ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
174*8c1d5ab3SHong Zhang       fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
17558a912c5SAmlan Barua #else
176e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
17758a912c5SAmlan Barua #endif
178b2b77a04SHong Zhang       break;
179b2b77a04SHong Zhang     }
180b2b77a04SHong Zhang     fftw->binarray  = x_array;
181b2b77a04SHong Zhang     fftw->boutarray = y_array;
182b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
183b2b77a04SHong Zhang   } else { /* use existing plan */
184b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
185b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
186b2b77a04SHong Zhang     } else {
187b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
188b2b77a04SHong Zhang     }
189b2b77a04SHong Zhang   }
190b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
191b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
192*8c1d5ab3SHong Zhang   //ierr = PetscFree(dim_32);CHKERRQ(ierr);
193b2b77a04SHong Zhang   PetscFunctionReturn(0);
194b2b77a04SHong Zhang }
195b2b77a04SHong Zhang 
19697504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel
19797504071SAmlan Barua    Input parameter:
1983564f093SHong Zhang      A - the matrix
19997504071SAmlan Barua      x - the vector on which FDFT will be performed
20097504071SAmlan Barua 
20197504071SAmlan Barua    Output parameter:
20297504071SAmlan Barua    y   - vector that stores result of FDFT
20397504071SAmlan Barua */
204b2b77a04SHong Zhang #undef __FUNCT__
205b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
206b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
207b2b77a04SHong Zhang {
208b2b77a04SHong Zhang   PetscErrorCode ierr;
209b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
210b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
211b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
212c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
213ce94432eSBarry Smith   MPI_Comm       comm;
214b2b77a04SHong Zhang 
215b2b77a04SHong Zhang   PetscFunctionBegin;
216ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
217b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
218b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
219b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
220b2b77a04SHong Zhang     switch (ndim) {
221b2b77a04SHong Zhang     case 1:
2223c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
223b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
224ae0a50aaSHong Zhang #else
2254f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
2263c3a9c75SAmlan Barua #endif
227b2b77a04SHong Zhang       break;
228b2b77a04SHong Zhang     case 2:
22997504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
230b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2313c3a9c75SAmlan Barua #else
2323c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2333c3a9c75SAmlan Barua #endif
234b2b77a04SHong Zhang       break;
235b2b77a04SHong Zhang     case 3:
2363c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
237b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2383c3a9c75SAmlan Barua #else
23951d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2403c3a9c75SAmlan Barua #endif
241b2b77a04SHong Zhang       break;
242b2b77a04SHong Zhang     default:
2433c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
244c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2453c3a9c75SAmlan Barua #else
2463c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2473c3a9c75SAmlan Barua #endif
248b2b77a04SHong Zhang       break;
249b2b77a04SHong Zhang     }
250b2b77a04SHong Zhang     fftw->finarray  = x_array;
251b2b77a04SHong Zhang     fftw->foutarray = y_array;
252b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
253b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
254b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
255b2b77a04SHong Zhang   } else { /* use existing plan */
256b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
257b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
258b2b77a04SHong Zhang     } else {
259b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
260b2b77a04SHong Zhang     }
261b2b77a04SHong Zhang   }
262b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
263b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
264b2b77a04SHong Zhang   PetscFunctionReturn(0);
265b2b77a04SHong Zhang }
266b2b77a04SHong Zhang 
26797504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT
26897504071SAmlan Barua    Input parameter:
2693564f093SHong Zhang      A - the matrix
27097504071SAmlan Barua      x - the vector on which BDFT will be performed
27197504071SAmlan Barua 
27297504071SAmlan Barua    Output parameter:
27397504071SAmlan Barua      y - vector that stores result of BDFT
27497504071SAmlan Barua */
275b2b77a04SHong Zhang #undef __FUNCT__
276b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
277b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
278b2b77a04SHong Zhang {
279b2b77a04SHong Zhang   PetscErrorCode ierr;
280b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
281b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
282b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
283c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
284ce94432eSBarry Smith   MPI_Comm       comm;
285b2b77a04SHong Zhang 
286b2b77a04SHong Zhang   PetscFunctionBegin;
287ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
288b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
289b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
290b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
291b2b77a04SHong Zhang     switch (ndim) {
292b2b77a04SHong Zhang     case 1:
2933c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
294b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
295ae0a50aaSHong Zhang #else
2964f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
2973c3a9c75SAmlan Barua #endif
298b2b77a04SHong Zhang       break;
299b2b77a04SHong Zhang     case 2:
30097504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
301b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3023c3a9c75SAmlan Barua #else
3033c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3043c3a9c75SAmlan Barua #endif
305b2b77a04SHong Zhang       break;
306b2b77a04SHong Zhang     case 3:
3073c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
308b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3093c3a9c75SAmlan Barua #else
3103c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3113c3a9c75SAmlan Barua #endif
312b2b77a04SHong Zhang       break;
313b2b77a04SHong Zhang     default:
3143c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
315c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
316*8c1d5ab3SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3173c3a9c75SAmlan Barua #else
3183c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3193c3a9c75SAmlan Barua #endif
320b2b77a04SHong Zhang       break;
321b2b77a04SHong Zhang     }
322b2b77a04SHong Zhang     fftw->binarray  = x_array;
323b2b77a04SHong Zhang     fftw->boutarray = y_array;
324b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
325b2b77a04SHong Zhang   } else { /* use existing plan */
326b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
327b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
328b2b77a04SHong Zhang     } else {
329b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
330b2b77a04SHong Zhang     }
331b2b77a04SHong Zhang   }
332b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
333b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
334b2b77a04SHong Zhang   PetscFunctionReturn(0);
335b2b77a04SHong Zhang }
336b2b77a04SHong Zhang 
337b2b77a04SHong Zhang #undef __FUNCT__
338b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
339b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
340b2b77a04SHong Zhang {
341b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
342b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
343b2b77a04SHong Zhang   PetscErrorCode ierr;
344b2b77a04SHong Zhang 
345b2b77a04SHong Zhang   PetscFunctionBegin;
346b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
347b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
348b1301b2eSHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
349*8c1d5ab3SHong Zhang   if (fftw->iodims) {
350*8c1d5ab3SHong Zhang     /* ierr = PetscFree(fftw->iodims);CHKERRQ(ierr); */
351*8c1d5ab3SHong Zhang     free(fftw->iodims);
352*8c1d5ab3SHong Zhang   }
353bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
3546ccf0b3eSHong Zhang   fftw_mpi_cleanup();
355b2b77a04SHong Zhang   PetscFunctionReturn(0);
356b2b77a04SHong Zhang }
357b2b77a04SHong Zhang 
358c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
359b2b77a04SHong Zhang #undef __FUNCT__
360b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
361b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
362b2b77a04SHong Zhang {
363b2b77a04SHong Zhang   PetscErrorCode ierr;
364b2b77a04SHong Zhang   PetscScalar    *array;
365b2b77a04SHong Zhang 
366b2b77a04SHong Zhang   PetscFunctionBegin;
367b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
368b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
369b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
370b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
371b2b77a04SHong Zhang   PetscFunctionReturn(0);
372b2b77a04SHong Zhang }
373b2b77a04SHong Zhang 
3744f7415efSAmlan Barua #undef __FUNCT__
3754be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW"
3764be45526SHong Zhang /*@
377b2aa233eSHong Zhang    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
3784f7415efSAmlan Barua      parallel layout determined by FFTW
3794f7415efSAmlan Barua 
3804f7415efSAmlan Barua    Collective on Mat
3814f7415efSAmlan Barua 
3824f7415efSAmlan Barua    Input Parameter:
3833564f093SHong Zhang .   A - the matrix
3844f7415efSAmlan Barua 
3854f7415efSAmlan Barua    Output Parameter:
386cc55f3b1SHong Zhang +   x - (optional) input vector of forward FFTW
387cc55f3b1SHong Zhang -   y - (optional) output vector of forward FFTW
388cc55f3b1SHong Zhang -   z - (optional) output vector of backward FFTW
3894f7415efSAmlan Barua 
3904f7415efSAmlan Barua   Level: advanced
3913564f093SHong Zhang 
39297504071SAmlan Barua   Note: The parallel layout of output of forward FFTW is always same as the input
39397504071SAmlan Barua         of the backward FFTW. But parallel layout of the input vector of forward
39497504071SAmlan Barua         FFTW might not be same as the output of backward FFTW.
39597504071SAmlan Barua         Also note that we need to provide enough space while doing parallel real transform.
39697504071SAmlan Barua         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
39797504071SAmlan Barua         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
398e0ec536eSAmlan Barua         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
399e0ec536eSAmlan Barua         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
400e0ec536eSAmlan Barua         zeros if it is odd only one zero is needed.
401e0ec536eSAmlan Barua         Lastly one needs some scratch space at the end of data set in each process. alloc_local
402e0ec536eSAmlan Barua         figures out how much space is needed, i.e. it figures out the data+scratch space for
403e0ec536eSAmlan Barua         each processor and returns that.
4044f7415efSAmlan Barua 
4054f7415efSAmlan Barua .seealso: MatCreateFFTW()
4064be45526SHong Zhang @*/
4074be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
4084be45526SHong Zhang {
4094be45526SHong Zhang   PetscErrorCode ierr;
410b4c3921fSHong Zhang 
4114be45526SHong Zhang   PetscFunctionBegin;
4124be45526SHong Zhang   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
4134be45526SHong Zhang   PetscFunctionReturn(0);
4144be45526SHong Zhang }
4154be45526SHong Zhang 
4164be45526SHong Zhang #undef __FUNCT__
4174be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW"
4184be45526SHong Zhang PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
4194f7415efSAmlan Barua {
4204f7415efSAmlan Barua   PetscErrorCode ierr;
4214f7415efSAmlan Barua   PetscMPIInt    size,rank;
422ce94432eSBarry Smith   MPI_Comm       comm;
4234f7415efSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
4244f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
4259496c9aeSAmlan Barua   PetscInt       N     = fft->N;
4264f7415efSAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
4274f7415efSAmlan Barua 
4284f7415efSAmlan Barua   PetscFunctionBegin;
4294f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
4304f7415efSAmlan Barua   PetscValidType(A,1);
431ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4324f7415efSAmlan Barua 
4334f8276c3SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
4344f8276c3SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
4354f7415efSAmlan Barua   if (size == 1) { /* sequential case */
4364f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4374f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
4384f7415efSAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
4394f7415efSAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
4404f7415efSAmlan Barua #else
4418ca4c034SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
4428ca4c034SAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
4438ca4c034SAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
4444f7415efSAmlan Barua #endif
44597504071SAmlan Barua   } else { /* parallel cases */
4464f7415efSAmlan Barua     ptrdiff_t    alloc_local,local_n0,local_0_start;
4479496c9aeSAmlan Barua     ptrdiff_t    local_n1;
4489496c9aeSAmlan Barua     fftw_complex *data_fout;
4499496c9aeSAmlan Barua     ptrdiff_t    local_1_start;
4509496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4519496c9aeSAmlan Barua     fftw_complex *data_fin,*data_bout;
4529496c9aeSAmlan Barua #else
4534f7415efSAmlan Barua     double    *data_finr,*data_boutr;
4546ccf0b3eSHong Zhang     PetscInt  n1,N1;
4559496c9aeSAmlan Barua     ptrdiff_t temp;
4569496c9aeSAmlan Barua #endif
4579496c9aeSAmlan Barua 
4584f7415efSAmlan Barua     switch (ndim) {
4594f7415efSAmlan Barua     case 1:
46057625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4614f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
46257625b84SAmlan Barua #else
46357625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
46457625b84SAmlan Barua       if (fin) {
46557625b84SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
466778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
46726fbe8dcSKarl Rupp 
46857625b84SAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
46957625b84SAmlan Barua       }
47057625b84SAmlan Barua       if (fout) {
47157625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
472778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
47326fbe8dcSKarl Rupp 
47457625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
47557625b84SAmlan Barua       }
47657625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
47757625b84SAmlan Barua       if (bout) {
47857625b84SAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
479778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
48026fbe8dcSKarl Rupp 
48157625b84SAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
48257625b84SAmlan Barua       }
48357625b84SAmlan Barua       break;
48457625b84SAmlan Barua #endif
4854f7415efSAmlan Barua     case 2:
48697504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
4874f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4884f7415efSAmlan Barua       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
4894f7415efSAmlan Barua       if (fin) {
4904f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
491778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
49226fbe8dcSKarl Rupp 
4934f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
4944f7415efSAmlan Barua       }
4954f7415efSAmlan Barua       if (bout) {
4964f7415efSAmlan Barua         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
497778a2246SBarry Smith         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
49826fbe8dcSKarl Rupp 
4994f7415efSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5004f7415efSAmlan Barua       }
50157625b84SAmlan Barua       if (fout) {
50257625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
503778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
50426fbe8dcSKarl Rupp 
50557625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
50657625b84SAmlan Barua       }
5074f7415efSAmlan Barua #else
5084f7415efSAmlan Barua       /* Get local size */
5094f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
5104f7415efSAmlan Barua       if (fin) {
5114f7415efSAmlan Barua         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
512778a2246SBarry Smith         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
51326fbe8dcSKarl Rupp 
5144f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5154f7415efSAmlan Barua       }
5164f7415efSAmlan Barua       if (fout) {
5174f7415efSAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
518778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
51926fbe8dcSKarl Rupp 
5204f7415efSAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
5214f7415efSAmlan Barua       }
5224f7415efSAmlan Barua       if (bout) {
5234f7415efSAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
524778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
52526fbe8dcSKarl Rupp 
5264f7415efSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5274f7415efSAmlan Barua       }
5284f7415efSAmlan Barua #endif
5294f7415efSAmlan Barua       break;
5304f7415efSAmlan Barua     case 3:
5314f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5324f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5334f7415efSAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
5344f7415efSAmlan Barua       if (fin) {
5354f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
536778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
53726fbe8dcSKarl Rupp 
5384f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5394f7415efSAmlan Barua       }
5404f7415efSAmlan Barua       if (bout) {
5414f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
542778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
54326fbe8dcSKarl Rupp 
5444f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5454f7415efSAmlan Barua       }
5463564f093SHong Zhang 
54757625b84SAmlan Barua       if (fout) {
54857625b84SAmlan Barua         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
549778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
55026fbe8dcSKarl Rupp 
55157625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
55257625b84SAmlan Barua       }
5534f7415efSAmlan Barua #else
5540c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
5550c9b18e4SAmlan Barua       if (fin) {
5560c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
557778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
55826fbe8dcSKarl Rupp 
5590c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5600c9b18e4SAmlan Barua       }
5610c9b18e4SAmlan Barua       if (fout) {
5620c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
563778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
56426fbe8dcSKarl Rupp 
5650c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5660c9b18e4SAmlan Barua       }
5670c9b18e4SAmlan Barua       if (bout) {
5680c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
569778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
57026fbe8dcSKarl Rupp 
5710c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5720c9b18e4SAmlan Barua       }
5734f7415efSAmlan Barua #endif
5744f7415efSAmlan Barua       break;
5754f7415efSAmlan Barua     default:
5764f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5774f7415efSAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
57826fbe8dcSKarl Rupp 
5794f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
58026fbe8dcSKarl Rupp 
5814f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5824f7415efSAmlan Barua       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
58326fbe8dcSKarl Rupp 
5844f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
5854f7415efSAmlan Barua 
5864f7415efSAmlan Barua       if (fin) {
5874f7415efSAmlan Barua         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
588778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
58926fbe8dcSKarl Rupp 
5904f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5914f7415efSAmlan Barua       }
5924f7415efSAmlan Barua       if (bout) {
5934f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
594778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
59526fbe8dcSKarl Rupp 
5969496c9aeSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5974f7415efSAmlan Barua       }
5983564f093SHong Zhang 
59957625b84SAmlan Barua       if (fout) {
60057625b84SAmlan Barua         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
601778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
60226fbe8dcSKarl Rupp 
60357625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
60457625b84SAmlan Barua       }
6054f7415efSAmlan Barua #else
6060c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
6070c9b18e4SAmlan Barua       if (fin) {
6080c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
609778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
61026fbe8dcSKarl Rupp 
6110c9b18e4SAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
6120c9b18e4SAmlan Barua       }
6130c9b18e4SAmlan Barua       if (fout) {
6140c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
615778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
61626fbe8dcSKarl Rupp 
6170c9b18e4SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
6180c9b18e4SAmlan Barua       }
6190c9b18e4SAmlan Barua       if (bout) {
6200c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
621778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
62226fbe8dcSKarl Rupp 
6230c9b18e4SAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
6240c9b18e4SAmlan Barua       }
6254f7415efSAmlan Barua #endif
6264f7415efSAmlan Barua       break;
6274f7415efSAmlan Barua     }
6284f7415efSAmlan Barua   }
6294f7415efSAmlan Barua   PetscFunctionReturn(0);
6304f7415efSAmlan Barua }
6314f7415efSAmlan Barua 
632c92418ccSAmlan Barua #undef __FUNCT__
63374a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW"
6343564f093SHong Zhang /*@
6353564f093SHong Zhang    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
63654efbe56SHong Zhang 
6373564f093SHong Zhang    Collective on Mat
6383564f093SHong Zhang 
6393564f093SHong Zhang    Input Parameters:
6403564f093SHong Zhang +  A - FFTW matrix
6413564f093SHong Zhang -  x - the PETSc vector
6423564f093SHong Zhang 
6433564f093SHong Zhang    Output Parameters:
6443564f093SHong Zhang .  y - the FFTW vector
6453564f093SHong Zhang 
646b2b77a04SHong Zhang   Options Database Keys:
6473564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags
648b2b77a04SHong Zhang 
649b2b77a04SHong Zhang    Level: intermediate
6503564f093SHong Zhang 
65197504071SAmlan Barua    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
65297504071SAmlan Barua          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
65397504071SAmlan Barua          zeros. This routine does that job by scattering operation.
65497504071SAmlan Barua 
6553564f093SHong Zhang .seealso: VecScatterFFTWToPetsc()
6563564f093SHong Zhang @*/
6573564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
6583564f093SHong Zhang {
6593564f093SHong Zhang   PetscErrorCode ierr;
660b2b77a04SHong Zhang 
6613564f093SHong Zhang   PetscFunctionBegin;
6623564f093SHong Zhang   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
6633564f093SHong Zhang   PetscFunctionReturn(0);
6643564f093SHong Zhang }
6653c3a9c75SAmlan Barua 
6663c3a9c75SAmlan Barua #undef __FUNCT__
6671986ecc6SHong Zhang #define __FUNCT__ "VecScatterPetscToFFTW_FFTW"
66874a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
6693c3a9c75SAmlan Barua {
6703c3a9c75SAmlan Barua   PetscErrorCode ierr;
671ce94432eSBarry Smith   MPI_Comm       comm;
6723c3a9c75SAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
6733c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
6749496c9aeSAmlan Barua   PetscInt       N     =fft->N;
675b5d11533SAmlan Barua   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
6769496c9aeSAmlan Barua   PetscInt       low;
6777a21806cSHong Zhang   PetscMPIInt    rank,size;
6787a21806cSHong Zhang   PetscInt       vsize,vsize1;
6797a21806cSHong Zhang   //ptrdiff_t      alloc_local,local_n0,local_0_start;
6807a21806cSHong Zhang   ptrdiff_t      local_n0,local_0_start;
6819496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
6823c3a9c75SAmlan Barua   VecScatter     vecscat;
6833c3a9c75SAmlan Barua   IS             list1,list2;
6849496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6859496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
6869496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
6879496c9aeSAmlan Barua   PetscInt       N1, n1,NM;
6889496c9aeSAmlan Barua   ptrdiff_t      temp;
6899496c9aeSAmlan Barua #endif
6903c3a9c75SAmlan Barua 
6913564f093SHong Zhang   PetscFunctionBegin;
692ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
693f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
694f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
6950298fd71SBarry Smith   ierr = VecGetOwnershipRange(y,&low,NULL);
6963c3a9c75SAmlan Barua 
6973564f093SHong Zhang   if (size==1) {
6988ca4c034SAmlan Barua     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
6998ca4c034SAmlan Barua     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
7009496c9aeSAmlan Barua     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
7016971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
7026971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7036971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7046971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
705b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
7063564f093SHong Zhang   } else {
7073c3a9c75SAmlan Barua     switch (ndim) {
7083c3a9c75SAmlan Barua     case 1:
70964657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7107a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
71126fbe8dcSKarl Rupp 
7124f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);
7134f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,low,1,&list2);
71464657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
71564657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
71664657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
71764657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
71864657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
71964657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
72064657d84SAmlan Barua #else
721e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
72264657d84SAmlan Barua #endif
7233c3a9c75SAmlan Barua       break;
7243c3a9c75SAmlan Barua     case 2:
725bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7267a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
72726fbe8dcSKarl Rupp 
7284f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
7294f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
730bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
731bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
732bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
733bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
734bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
735bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
736bd59e6c6SAmlan Barua #else
7373c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
73826fbe8dcSKarl Rupp 
7393c3a9c75SAmlan Barua       N1   = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
7409496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
7419496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
7423c3a9c75SAmlan Barua 
7433564f093SHong Zhang       if (dim[1]%2==0) {
7443c3a9c75SAmlan Barua         NM = dim[1]+2;
7453564f093SHong Zhang       } else {
7463c3a9c75SAmlan Barua         NM = dim[1]+1;
7473564f093SHong Zhang       }
7483c3a9c75SAmlan Barua       for (i=0; i<local_n0; i++) {
7493c3a9c75SAmlan Barua         for (j=0; j<dim[1]; j++) {
7503c3a9c75SAmlan Barua           tempindx  = i*dim[1] + j;
7513c3a9c75SAmlan Barua           tempindx1 = i*NM + j;
75226fbe8dcSKarl Rupp 
7535b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
7543c3a9c75SAmlan Barua           indx2[tempindx]=low+tempindx1;
7553c3a9c75SAmlan Barua         }
7563c3a9c75SAmlan Barua       }
7573c3a9c75SAmlan Barua 
7583c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
7593c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
7603c3a9c75SAmlan Barua 
761f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
762f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
763f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
764f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
765b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
766b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
767b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
768b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
769bd59e6c6SAmlan Barua #endif
7709496c9aeSAmlan Barua       break;
7713c3a9c75SAmlan Barua 
7723c3a9c75SAmlan Barua     case 3:
773bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7747a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
77526fbe8dcSKarl Rupp 
7764f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
7774f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
778bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
779bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
780bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
781bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
782bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
783bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
784bd59e6c6SAmlan Barua #else
7857a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
78626fbe8dcSKarl Rupp 
78751d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
78851d1eed7SAmlan Barua 
7899496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
7909496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
79151d1eed7SAmlan Barua 
792db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
793db4deed7SKarl Rupp       else             NM = dim[2]+1;
79451d1eed7SAmlan Barua 
79551d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
79651d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
79751d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
79851d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
79951d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
80026fbe8dcSKarl Rupp 
80151d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
80251d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
80351d1eed7SAmlan Barua           }
80451d1eed7SAmlan Barua         }
80551d1eed7SAmlan Barua       }
80651d1eed7SAmlan Barua 
80751d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
80851d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
80951d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
81051d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
81151d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
81251d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
813b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
814b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
815b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
816b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
817bd59e6c6SAmlan Barua #endif
8189496c9aeSAmlan Barua       break;
8193c3a9c75SAmlan Barua 
8203c3a9c75SAmlan Barua     default:
821bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
8227a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
82326fbe8dcSKarl Rupp 
8244f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
8254f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
826bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
827bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
828bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
829bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
830bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
831bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
832bd59e6c6SAmlan Barua #else
833e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
83426fbe8dcSKarl Rupp 
835e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
83626fbe8dcSKarl Rupp 
8377a21806cSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
83826fbe8dcSKarl Rupp 
839e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
84026fbe8dcSKarl Rupp 
841e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
842e5d7f247SAmlan Barua 
843e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
844e5d7f247SAmlan Barua 
845e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
846e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
847e5d7f247SAmlan Barua 
848db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
849db4deed7SKarl Rupp       else                  NM = 1;
850e5d7f247SAmlan Barua 
8516971673cSAmlan Barua       j = low;
8523564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
8536971673cSAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
8546971673cSAmlan Barua         indx2[i] = j;
85526fbe8dcSKarl Rupp         if (k%dim[ndim-1]==0) j+=NM;
8566971673cSAmlan Barua         j++;
8576971673cSAmlan Barua       }
8586971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
8596971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
8606971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
8616971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8626971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8636971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
864b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
865b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
866b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
867b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
868bd59e6c6SAmlan Barua #endif
8699496c9aeSAmlan Barua       break;
8703c3a9c75SAmlan Barua     }
871e81bb599SAmlan Barua   }
8723564f093SHong Zhang   PetscFunctionReturn(0);
8733c3a9c75SAmlan Barua }
8743c3a9c75SAmlan Barua 
8753c3a9c75SAmlan Barua #undef __FUNCT__
87674a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc"
8773564f093SHong Zhang /*@
8783564f093SHong Zhang    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
8793564f093SHong Zhang 
8803564f093SHong Zhang    Collective on Mat
8813564f093SHong Zhang 
8823564f093SHong Zhang     Input Parameters:
8833564f093SHong Zhang +   A - FFTW matrix
8843564f093SHong Zhang -   x - FFTW vector
8853564f093SHong Zhang 
8863564f093SHong Zhang    Output Parameters:
8873564f093SHong Zhang .  y - PETSc vector
8883564f093SHong Zhang 
8893564f093SHong Zhang    Level: intermediate
8903564f093SHong Zhang 
8913564f093SHong Zhang    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
8923564f093SHong Zhang          VecScatterFFTWToPetsc removes those extra zeros.
8933564f093SHong Zhang 
8943564f093SHong Zhang .seealso: VecScatterPetscToFFTW()
8953564f093SHong Zhang @*/
89674a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
8973c3a9c75SAmlan Barua {
8983c3a9c75SAmlan Barua   PetscErrorCode ierr;
8995fd66863SKarl Rupp 
9003c3a9c75SAmlan Barua   PetscFunctionBegin;
90174a26884SAmlan Barua   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
9023c3a9c75SAmlan Barua   PetscFunctionReturn(0);
9033c3a9c75SAmlan Barua }
90454efbe56SHong Zhang 
9053c3a9c75SAmlan Barua #undef __FUNCT__
90674a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
90774a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
9085b3e41ffSAmlan Barua {
9095b3e41ffSAmlan Barua   PetscErrorCode ierr;
910ce94432eSBarry Smith   MPI_Comm       comm;
9115b3e41ffSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
9125b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
9139496c9aeSAmlan Barua   PetscInt       N     = fft->N;
914b3655f67SAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
9159496c9aeSAmlan Barua   PetscInt       low;
9167a21806cSHong Zhang   PetscMPIInt    rank,size;
9177a21806cSHong Zhang   ptrdiff_t      local_n0,local_0_start;
9189496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
9195b3e41ffSAmlan Barua   VecScatter     vecscat;
9205b3e41ffSAmlan Barua   IS             list1,list2;
9219496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
9229496c9aeSAmlan Barua   PetscInt  i,j,k,partial_dim;
9239496c9aeSAmlan Barua   PetscInt  *indx1, *indx2, tempindx, tempindx1;
9249496c9aeSAmlan Barua   PetscInt  N1, n1,NM;
9259496c9aeSAmlan Barua   ptrdiff_t temp;
9269496c9aeSAmlan Barua #endif
9279496c9aeSAmlan Barua 
9283564f093SHong Zhang   PetscFunctionBegin;
929ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
9305b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
9315b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
9320298fd71SBarry Smith   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
9335b3e41ffSAmlan Barua 
934e81bb599SAmlan Barua   if (size==1) {
935b3655f67SAmlan Barua     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
9366971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
9376971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9386971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9396971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
940b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
941e81bb599SAmlan Barua 
9423564f093SHong Zhang   } else {
943e81bb599SAmlan Barua     switch (ndim) {
944e81bb599SAmlan Barua     case 1:
94564657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
9467a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
94726fbe8dcSKarl Rupp 
9484f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);
9494f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,low,1,&list2);
95064657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
95164657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
95264657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
95364657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
95464657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
95564657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
95664657d84SAmlan Barua #else
9576a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
95864657d84SAmlan Barua #endif
9595b3e41ffSAmlan Barua       break;
9605b3e41ffSAmlan Barua     case 2:
961bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
9627a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
96326fbe8dcSKarl Rupp 
9644f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
9654f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
966bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
967bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
969bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
970bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
971bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
972bd59e6c6SAmlan Barua #else
9737a21806cSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
97426fbe8dcSKarl Rupp 
9755b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
9765b3e41ffSAmlan Barua 
9779496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
9789496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
9795b3e41ffSAmlan Barua 
980db4deed7SKarl Rupp       if (dim[1]%2==0) NM = dim[1]+2;
981db4deed7SKarl Rupp       else             NM = dim[1]+1;
9825b3e41ffSAmlan Barua 
9835b3e41ffSAmlan Barua       for (i=0; i<local_n0; i++) {
9845b3e41ffSAmlan Barua         for (j=0; j<dim[1]; j++) {
9855b3e41ffSAmlan Barua           tempindx = i*dim[1] + j;
9865b3e41ffSAmlan Barua           tempindx1 = i*NM + j;
98726fbe8dcSKarl Rupp 
9885b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
9895b3e41ffSAmlan Barua           indx2[tempindx]=low+tempindx1;
9905b3e41ffSAmlan Barua         }
9915b3e41ffSAmlan Barua       }
9925b3e41ffSAmlan Barua 
9935b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
9945b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
9955b3e41ffSAmlan Barua 
9965b3e41ffSAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
9975b3e41ffSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9985b3e41ffSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9995b3e41ffSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1000b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1001b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1002b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1003b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1004bd59e6c6SAmlan Barua #endif
10059496c9aeSAmlan Barua       break;
10065b3e41ffSAmlan Barua     case 3:
1007bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10087a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
100926fbe8dcSKarl Rupp 
10104f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
10114f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
1012bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1013bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1014bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1015bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1016bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1017bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1018bd59e6c6SAmlan Barua #else
1019bd59e6c6SAmlan Barua 
10207a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
102126fbe8dcSKarl Rupp 
102251d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
102351d1eed7SAmlan Barua 
10249496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
10259496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
102651d1eed7SAmlan Barua 
1027db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
1028db4deed7SKarl Rupp       else             NM = dim[2]+1;
102951d1eed7SAmlan Barua 
103051d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
103151d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
103251d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
103351d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
103451d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
103526fbe8dcSKarl Rupp 
103651d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
103751d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
103851d1eed7SAmlan Barua           }
103951d1eed7SAmlan Barua         }
104051d1eed7SAmlan Barua       }
104151d1eed7SAmlan Barua 
104251d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
104351d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
104451d1eed7SAmlan Barua 
104551d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
104651d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
104751d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
104851d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1049b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1050b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1051b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1052b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1053bd59e6c6SAmlan Barua #endif
10549496c9aeSAmlan Barua       break;
10555b3e41ffSAmlan Barua     default:
1056bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10577a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
105826fbe8dcSKarl Rupp 
10594f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
10604f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1061bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1062bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1063bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1064bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1065bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1066bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1067bd59e6c6SAmlan Barua #else
1068ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
106926fbe8dcSKarl Rupp 
1070ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
107126fbe8dcSKarl Rupp 
1072ba6e06d0SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
107326fbe8dcSKarl Rupp 
1074ba6e06d0SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
107526fbe8dcSKarl Rupp 
1076ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1077ba6e06d0SAmlan Barua 
1078ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1079ba6e06d0SAmlan Barua 
1080ba6e06d0SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1081ba6e06d0SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1082ba6e06d0SAmlan Barua 
1083db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
1084db4deed7SKarl Rupp       else                  NM = 1;
1085ba6e06d0SAmlan Barua 
1086ba6e06d0SAmlan Barua       j = low;
10873564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1088ba6e06d0SAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
1089ba6e06d0SAmlan Barua         indx2[i] = j;
10903564f093SHong Zhang         if (k%dim[ndim-1]==0) j+=NM;
1091ba6e06d0SAmlan Barua         j++;
1092ba6e06d0SAmlan Barua       }
1093ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1094ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1095ba6e06d0SAmlan Barua 
1096ba6e06d0SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1097ba6e06d0SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1098ba6e06d0SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1099ba6e06d0SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1100b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1101b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1102b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1103b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1104bd59e6c6SAmlan Barua #endif
11059496c9aeSAmlan Barua       break;
11065b3e41ffSAmlan Barua     }
1107e81bb599SAmlan Barua   }
11083564f093SHong Zhang   PetscFunctionReturn(0);
11095b3e41ffSAmlan Barua }
11105b3e41ffSAmlan Barua 
11115b3e41ffSAmlan Barua #undef __FUNCT__
11123c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
11133c3a9c75SAmlan Barua /*
11143564f093SHong Zhang     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
11153564f093SHong Zhang 
11163c3a9c75SAmlan Barua   Options Database Keys:
11173c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
11183c3a9c75SAmlan Barua 
11193c3a9c75SAmlan Barua    Level: intermediate
11203c3a9c75SAmlan Barua 
11213c3a9c75SAmlan Barua */
11228cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1123b2b77a04SHong Zhang {
1124b2b77a04SHong Zhang   PetscErrorCode ierr;
1125ce94432eSBarry Smith   MPI_Comm       comm;
1126b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1127b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1128b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
11295274ed1bSHong Zhang   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
11305274ed1bSHong Zhang   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1131b2b77a04SHong Zhang   PetscBool      flg;
11324f48bc67SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr;
113311902ff2SHong Zhang   PetscMPIInt    size,rank;
11349496c9aeSAmlan Barua   ptrdiff_t      *pdim;
11359496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
11369496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11379496c9aeSAmlan Barua   ptrdiff_t temp;
11388ca4c034SAmlan Barua   PetscInt  N1,tot_dim;
11394f48bc67SAmlan Barua #else
11404f48bc67SAmlan Barua   PetscInt n1;
11419496c9aeSAmlan Barua #endif
11429496c9aeSAmlan Barua 
1143b2b77a04SHong Zhang   PetscFunctionBegin;
1144ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1145b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
114611902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1147c92418ccSAmlan Barua 
11481878d478SAmlan Barua   fftw_mpi_init();
114911902ff2SHong Zhang   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
115011902ff2SHong Zhang   pdim[0] = dim[0];
11518ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11528ca4c034SAmlan Barua   tot_dim = 2*dim[0];
11538ca4c034SAmlan Barua #endif
11543564f093SHong Zhang   for (ctr=1; ctr<ndim; ctr++) {
11556882372aSHong Zhang     partial_dim *= dim[ctr];
115611902ff2SHong Zhang     pdim[ctr]    = dim[ctr];
11578ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1158db4deed7SKarl Rupp     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1159db4deed7SKarl Rupp     else             tot_dim *= dim[ctr];
11608ca4c034SAmlan Barua #endif
11616882372aSHong Zhang   }
11623c3a9c75SAmlan Barua 
1163b2b77a04SHong Zhang   if (size == 1) {
1164e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1165b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1166b2b77a04SHong Zhang     n    = N;
1167e81bb599SAmlan Barua #else
1168e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1169e81bb599SAmlan Barua     n    = tot_dim;
1170e81bb599SAmlan Barua #endif
1171e81bb599SAmlan Barua 
1172b2b77a04SHong Zhang   } else {
11737a21806cSHong Zhang     ptrdiff_t local_n0,local_0_start;
1174b2b77a04SHong Zhang     switch (ndim) {
1175b2b77a04SHong Zhang     case 1:
11763c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11773c3a9c75SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1178e5d7f247SAmlan Barua #else
11797a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
118026fbe8dcSKarl Rupp 
11816882372aSHong Zhang       n    = (PetscInt)local_n0;
11829496c9aeSAmlan Barua       n1   = (PetscInt)local_n1;
11839496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1184e5d7f247SAmlan Barua #endif
1185b2b77a04SHong Zhang       break;
1186b2b77a04SHong Zhang     case 2:
11875b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
11887a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1189b2b77a04SHong Zhang       /*
1190b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
11910ec8b6e3SBarry Smith        PetscSynchronizedFlush(comm,PETSC_STDOUT);
1192b2b77a04SHong Zhang        */
1193b2b77a04SHong Zhang       n    = (PetscInt)local_n0*dim[1];
1194b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
11955b3e41ffSAmlan Barua #else
11964f8276c3SHong Zhang       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
119726fbe8dcSKarl Rupp 
11985b3e41ffSAmlan Barua       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
1199c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
12005b3e41ffSAmlan Barua #endif
1201b2b77a04SHong Zhang       break;
1202b2b77a04SHong Zhang     case 3:
120351d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12047a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
120526fbe8dcSKarl Rupp 
12066882372aSHong Zhang       n    = (PetscInt)local_n0*dim[1]*dim[2];
12076882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
120851d1eed7SAmlan Barua #else
12094f8276c3SHong Zhang       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
121026fbe8dcSKarl Rupp 
121151d1eed7SAmlan Barua       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1212c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
121351d1eed7SAmlan Barua #endif
1214b2b77a04SHong Zhang       break;
1215b2b77a04SHong Zhang     default:
1216b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12177a21806cSHong Zhang       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
121826fbe8dcSKarl Rupp 
12196882372aSHong Zhang       n    = (PetscInt)local_n0*partial_dim;
12206882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1221b3a17365SAmlan Barua #else
1222b3a17365SAmlan Barua       temp = pdim[ndim-1];
122326fbe8dcSKarl Rupp 
1224b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
122526fbe8dcSKarl Rupp 
12264f8276c3SHong Zhang       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
122726fbe8dcSKarl Rupp 
1228b3a17365SAmlan Barua       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1229b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
123026fbe8dcSKarl Rupp 
1231b3a17365SAmlan Barua       pdim[ndim-1] = temp;
123226fbe8dcSKarl Rupp 
1233c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1234b3a17365SAmlan Barua #endif
1235b2b77a04SHong Zhang       break;
1236b2b77a04SHong Zhang     }
1237b2b77a04SHong Zhang   }
1238b2b77a04SHong Zhang   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1239b00a9115SJed Brown   ierr      = PetscNewLog(A,&fftw);CHKERRQ(ierr);
1240b2b77a04SHong Zhang   fft->data = (void*)fftw;
1241b2b77a04SHong Zhang 
1242b2b77a04SHong Zhang   fft->n            = n;
12430c74a584SJed Brown   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1244e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
124526fbe8dcSKarl Rupp 
12465e806cb5SMatthew G. Knepley   ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr);
1247*8c1d5ab3SHong Zhang   if (size == 1) {
1248*8c1d5ab3SHong Zhang     /* ierr = PetscMalloc(ndim, &(fftw->iodims));CHKERRQ(ierr);  error! */
1249*8c1d5ab3SHong Zhang     /* ierr = PetscMalloc(ndim*sizeof(fftw_iodim),&(fftw->iodims));CHKERRQ(ierr); -not sure if this is ok */
1250*8c1d5ab3SHong Zhang     fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1251*8c1d5ab3SHong Zhang   }
125226fbe8dcSKarl Rupp 
1253b1301b2eSHong Zhang   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1254c92418ccSAmlan Barua 
1255b2b77a04SHong Zhang   fftw->p_forward  = 0;
1256b2b77a04SHong Zhang   fftw->p_backward = 0;
1257b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1258b2b77a04SHong Zhang 
1259b2b77a04SHong Zhang   if (size == 1) {
1260b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1261b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1262b2b77a04SHong Zhang   } else {
1263b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1264b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1265b2b77a04SHong Zhang   }
1266b2b77a04SHong Zhang   fft->matdestroy = MatDestroy_FFTW;
1267b2b77a04SHong Zhang   A->assembled    = PETSC_TRUE;
12684a52bad8SHong Zhang   A->preallocated = PETSC_TRUE;
126926fbe8dcSKarl Rupp 
1270bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr);
1271bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
1272bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1273b2b77a04SHong Zhang 
1274b2b77a04SHong Zhang   /* get runtime options */
1275ce94432eSBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
12765274ed1bSHong Zhang   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr);
12775274ed1bSHong Zhang   if (flg) {
12785274ed1bSHong Zhang     fftw->p_flag = iplans[p_flag];
12795274ed1bSHong Zhang   }
12804a52bad8SHong Zhang   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1281b2b77a04SHong Zhang   PetscFunctionReturn(0);
1282b2b77a04SHong Zhang }
12833c3a9c75SAmlan Barua 
12843c3a9c75SAmlan Barua 
12853c3a9c75SAmlan Barua 
12863c3a9c75SAmlan Barua 
1287