xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision bb5bf6f60c56bbb7c8cb738533978661b6e0efd0)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
148c1d5ab3SHong Zhang   fftw_iodim  *iodims;
15e5d7f247SAmlan Barua   PetscInt    partial_dim;
16b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
17b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
18b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
19b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
20b2b77a04SHong Zhang } Mat_FFTW;
21b2b77a04SHong Zhang 
22b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
25b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
26b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
27b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
28b2b77a04SHong Zhang 
2997504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel
3097504071SAmlan Barua    Input parameter:
313564f093SHong Zhang      A - the matrix
3297504071SAmlan Barua      x - the vector on which FDFT will be performed
3397504071SAmlan Barua 
3497504071SAmlan Barua    Output parameter:
3597504071SAmlan Barua      y - vector that stores result of FDFT
3697504071SAmlan Barua */
37b2b77a04SHong Zhang #undef __FUNCT__
38b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
39b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
40b2b77a04SHong Zhang {
41b2b77a04SHong Zhang   PetscErrorCode ierr;
42b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
43b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
44b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
458c1d5ab3SHong Zhang   fftw_iodim     *iodims;
46*bb5bf6f6SHong Zhang   PetscInt       ndim = fft->ndim,*dim = fft->dim,i;
47b2b77a04SHong Zhang 
48b2b77a04SHong Zhang   PetscFunctionBegin;
49b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
50b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
51b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
52b2b77a04SHong Zhang     switch (ndim) {
53b2b77a04SHong Zhang     case 1:
5458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
55b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
5658a912c5SAmlan Barua #else
5758a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
5858a912c5SAmlan Barua #endif
59b2b77a04SHong Zhang       break;
60b2b77a04SHong Zhang     case 2:
6158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
62b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6358a912c5SAmlan Barua #else
6458a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
6558a912c5SAmlan Barua #endif
66b2b77a04SHong Zhang       break;
67b2b77a04SHong Zhang     case 3:
6858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
69b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
7058a912c5SAmlan Barua #else
7158a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
7258a912c5SAmlan Barua #endif
73b2b77a04SHong Zhang       break;
74b2b77a04SHong Zhang     default:
7558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
768c1d5ab3SHong Zhang       iodims = (fftw_iodim*)fftw->iodims;
778c1d5ab3SHong Zhang       if (ndim) {
788c1d5ab3SHong Zhang         iodims[ndim-1].n = dim[ndim-1];
798c1d5ab3SHong Zhang         iodims[ndim-1].is = iodims[ndim-1].os = 1;
808c1d5ab3SHong Zhang         for (i=ndim-2; i>=0; --i) {
818c1d5ab3SHong Zhang           iodims[i].n = dim[i];
828c1d5ab3SHong Zhang           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
838c1d5ab3SHong Zhang         }
848c1d5ab3SHong Zhang       }
858c1d5ab3SHong Zhang       fftw->p_forward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
8658a912c5SAmlan Barua #else
8758a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
8858a912c5SAmlan Barua #endif
89b2b77a04SHong Zhang       break;
90b2b77a04SHong Zhang     }
91b2b77a04SHong Zhang     fftw->finarray  = x_array;
92b2b77a04SHong Zhang     fftw->foutarray = y_array;
93b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
94b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
95b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
96b2b77a04SHong Zhang   } else { /* use existing plan */
97b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
98b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
99b2b77a04SHong Zhang     } else {
100b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
101b2b77a04SHong Zhang     }
102b2b77a04SHong Zhang   }
103b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
104b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
105b2b77a04SHong Zhang   PetscFunctionReturn(0);
106b2b77a04SHong Zhang }
107b2b77a04SHong Zhang 
10897504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT
10997504071SAmlan Barua    Input parameter:
1103564f093SHong Zhang      A - the matrix
11197504071SAmlan Barua      x - the vector on which BDFT will be performed
11297504071SAmlan Barua 
11397504071SAmlan Barua    Output parameter:
11497504071SAmlan Barua      y - vector that stores result of BDFT
11597504071SAmlan Barua */
11697504071SAmlan Barua 
117b2b77a04SHong Zhang #undef __FUNCT__
118b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
119b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
120b2b77a04SHong Zhang {
121b2b77a04SHong Zhang   PetscErrorCode ierr;
122b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
123b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
124b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
125b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
1268c1d5ab3SHong Zhang   fftw_iodim     *iodims=(fftw_iodim*)fftw->iodims;
127b2b77a04SHong Zhang 
128b2b77a04SHong Zhang   PetscFunctionBegin;
129b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
130b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
131b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
132b2b77a04SHong Zhang     switch (ndim) {
133b2b77a04SHong Zhang     case 1:
13458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
135b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
13658a912c5SAmlan Barua #else
137e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
13858a912c5SAmlan Barua #endif
139b2b77a04SHong Zhang       break;
140b2b77a04SHong Zhang     case 2:
14158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
142b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
14358a912c5SAmlan Barua #else
144e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
14558a912c5SAmlan Barua #endif
146b2b77a04SHong Zhang       break;
147b2b77a04SHong Zhang     case 3:
14858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
149b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
15058a912c5SAmlan Barua #else
151e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
15258a912c5SAmlan Barua #endif
153b2b77a04SHong Zhang       break;
154b2b77a04SHong Zhang     default:
15558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1568c1d5ab3SHong Zhang       fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
15758a912c5SAmlan Barua #else
158e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
15958a912c5SAmlan Barua #endif
160b2b77a04SHong Zhang       break;
161b2b77a04SHong Zhang     }
162b2b77a04SHong Zhang     fftw->binarray  = x_array;
163b2b77a04SHong Zhang     fftw->boutarray = y_array;
164b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
165b2b77a04SHong Zhang   } else { /* use existing plan */
166b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
167b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
168b2b77a04SHong Zhang     } else {
169b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
170b2b77a04SHong Zhang     }
171b2b77a04SHong Zhang   }
172b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
173b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
174b2b77a04SHong Zhang   PetscFunctionReturn(0);
175b2b77a04SHong Zhang }
176b2b77a04SHong Zhang 
17797504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel
17897504071SAmlan Barua    Input parameter:
1793564f093SHong Zhang      A - the matrix
18097504071SAmlan Barua      x - the vector on which FDFT will be performed
18197504071SAmlan Barua 
18297504071SAmlan Barua    Output parameter:
18397504071SAmlan Barua    y   - vector that stores result of FDFT
18497504071SAmlan Barua */
185b2b77a04SHong Zhang #undef __FUNCT__
186b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
187b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
188b2b77a04SHong Zhang {
189b2b77a04SHong Zhang   PetscErrorCode ierr;
190b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
191b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
192b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
193c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
194ce94432eSBarry Smith   MPI_Comm       comm;
195b2b77a04SHong Zhang 
196b2b77a04SHong Zhang   PetscFunctionBegin;
197ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
198b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
199b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
200b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
201b2b77a04SHong Zhang     switch (ndim) {
202b2b77a04SHong Zhang     case 1:
2033c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
204b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
205ae0a50aaSHong Zhang #else
2064f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
2073c3a9c75SAmlan Barua #endif
208b2b77a04SHong Zhang       break;
209b2b77a04SHong Zhang     case 2:
21097504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
211b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2123c3a9c75SAmlan Barua #else
2133c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2143c3a9c75SAmlan Barua #endif
215b2b77a04SHong Zhang       break;
216b2b77a04SHong Zhang     case 3:
2173c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
218b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2193c3a9c75SAmlan Barua #else
22051d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2213c3a9c75SAmlan Barua #endif
222b2b77a04SHong Zhang       break;
223b2b77a04SHong Zhang     default:
2243c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
225c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2263c3a9c75SAmlan Barua #else
2273c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2283c3a9c75SAmlan Barua #endif
229b2b77a04SHong Zhang       break;
230b2b77a04SHong Zhang     }
231b2b77a04SHong Zhang     fftw->finarray  = x_array;
232b2b77a04SHong Zhang     fftw->foutarray = y_array;
233b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
234b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
235b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
236b2b77a04SHong Zhang   } else { /* use existing plan */
237b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
238b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
239b2b77a04SHong Zhang     } else {
240b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
241b2b77a04SHong Zhang     }
242b2b77a04SHong Zhang   }
243b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
244b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
245b2b77a04SHong Zhang   PetscFunctionReturn(0);
246b2b77a04SHong Zhang }
247b2b77a04SHong Zhang 
24897504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT
24997504071SAmlan Barua    Input parameter:
2503564f093SHong Zhang      A - the matrix
25197504071SAmlan Barua      x - the vector on which BDFT will be performed
25297504071SAmlan Barua 
25397504071SAmlan Barua    Output parameter:
25497504071SAmlan Barua      y - vector that stores result of BDFT
25597504071SAmlan Barua */
256b2b77a04SHong Zhang #undef __FUNCT__
257b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
258b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
259b2b77a04SHong Zhang {
260b2b77a04SHong Zhang   PetscErrorCode ierr;
261b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
262b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
263b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
264c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
265ce94432eSBarry Smith   MPI_Comm       comm;
266b2b77a04SHong Zhang 
267b2b77a04SHong Zhang   PetscFunctionBegin;
268ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
269b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
270b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
271b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
272b2b77a04SHong Zhang     switch (ndim) {
273b2b77a04SHong Zhang     case 1:
2743c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
275b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
276ae0a50aaSHong Zhang #else
2774f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
2783c3a9c75SAmlan Barua #endif
279b2b77a04SHong Zhang       break;
280b2b77a04SHong Zhang     case 2:
28197504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
282b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2833c3a9c75SAmlan Barua #else
2843c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
2853c3a9c75SAmlan Barua #endif
286b2b77a04SHong Zhang       break;
287b2b77a04SHong Zhang     case 3:
2883c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
289b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2903c3a9c75SAmlan Barua #else
2913c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
2923c3a9c75SAmlan Barua #endif
293b2b77a04SHong Zhang       break;
294b2b77a04SHong Zhang     default:
2953c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
296c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2973c3a9c75SAmlan Barua #else
2983c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
2993c3a9c75SAmlan Barua #endif
300b2b77a04SHong Zhang       break;
301b2b77a04SHong Zhang     }
302b2b77a04SHong Zhang     fftw->binarray  = x_array;
303b2b77a04SHong Zhang     fftw->boutarray = y_array;
304b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
305b2b77a04SHong Zhang   } else { /* use existing plan */
306b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
307b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
308b2b77a04SHong Zhang     } else {
309b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
310b2b77a04SHong Zhang     }
311b2b77a04SHong Zhang   }
312b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
313b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
314b2b77a04SHong Zhang   PetscFunctionReturn(0);
315b2b77a04SHong Zhang }
316b2b77a04SHong Zhang 
317b2b77a04SHong Zhang #undef __FUNCT__
318b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
319b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
320b2b77a04SHong Zhang {
321b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
322b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
323b2b77a04SHong Zhang   PetscErrorCode ierr;
324b2b77a04SHong Zhang 
325b2b77a04SHong Zhang   PetscFunctionBegin;
326b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
327b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
3288c1d5ab3SHong Zhang   if (fftw->iodims) {
3298c1d5ab3SHong Zhang     /* ierr = PetscFree(fftw->iodims);CHKERRQ(ierr); */
3308c1d5ab3SHong Zhang     free(fftw->iodims);
3318c1d5ab3SHong Zhang   }
332*bb5bf6f6SHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
333bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
3346ccf0b3eSHong Zhang   fftw_mpi_cleanup();
335b2b77a04SHong Zhang   PetscFunctionReturn(0);
336b2b77a04SHong Zhang }
337b2b77a04SHong Zhang 
338c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
339b2b77a04SHong Zhang #undef __FUNCT__
340b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
341b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
342b2b77a04SHong Zhang {
343b2b77a04SHong Zhang   PetscErrorCode ierr;
344b2b77a04SHong Zhang   PetscScalar    *array;
345b2b77a04SHong Zhang 
346b2b77a04SHong Zhang   PetscFunctionBegin;
347b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
348b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
349b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
350b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
351b2b77a04SHong Zhang   PetscFunctionReturn(0);
352b2b77a04SHong Zhang }
353b2b77a04SHong Zhang 
3544f7415efSAmlan Barua #undef __FUNCT__
3554be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW"
3564be45526SHong Zhang /*@
357b2aa233eSHong Zhang    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
3584f7415efSAmlan Barua      parallel layout determined by FFTW
3594f7415efSAmlan Barua 
3604f7415efSAmlan Barua    Collective on Mat
3614f7415efSAmlan Barua 
3624f7415efSAmlan Barua    Input Parameter:
3633564f093SHong Zhang .   A - the matrix
3644f7415efSAmlan Barua 
3654f7415efSAmlan Barua    Output Parameter:
366cc55f3b1SHong Zhang +   x - (optional) input vector of forward FFTW
367cc55f3b1SHong Zhang -   y - (optional) output vector of forward FFTW
368cc55f3b1SHong Zhang -   z - (optional) output vector of backward FFTW
3694f7415efSAmlan Barua 
3704f7415efSAmlan Barua   Level: advanced
3713564f093SHong Zhang 
37297504071SAmlan Barua   Note: The parallel layout of output of forward FFTW is always same as the input
37397504071SAmlan Barua         of the backward FFTW. But parallel layout of the input vector of forward
37497504071SAmlan Barua         FFTW might not be same as the output of backward FFTW.
37597504071SAmlan Barua         Also note that we need to provide enough space while doing parallel real transform.
37697504071SAmlan Barua         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
37797504071SAmlan Barua         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
378e0ec536eSAmlan Barua         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
379e0ec536eSAmlan Barua         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
380e0ec536eSAmlan Barua         zeros if it is odd only one zero is needed.
381e0ec536eSAmlan Barua         Lastly one needs some scratch space at the end of data set in each process. alloc_local
382e0ec536eSAmlan Barua         figures out how much space is needed, i.e. it figures out the data+scratch space for
383e0ec536eSAmlan Barua         each processor and returns that.
3844f7415efSAmlan Barua 
3854f7415efSAmlan Barua .seealso: MatCreateFFTW()
3864be45526SHong Zhang @*/
3874be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
3884be45526SHong Zhang {
3894be45526SHong Zhang   PetscErrorCode ierr;
390b4c3921fSHong Zhang 
3914be45526SHong Zhang   PetscFunctionBegin;
3924be45526SHong Zhang   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
3934be45526SHong Zhang   PetscFunctionReturn(0);
3944be45526SHong Zhang }
3954be45526SHong Zhang 
3964be45526SHong Zhang #undef __FUNCT__
3974be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW"
3984be45526SHong Zhang PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
3994f7415efSAmlan Barua {
4004f7415efSAmlan Barua   PetscErrorCode ierr;
4014f7415efSAmlan Barua   PetscMPIInt    size,rank;
402ce94432eSBarry Smith   MPI_Comm       comm;
4034f7415efSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
4044f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
4059496c9aeSAmlan Barua   PetscInt       N     = fft->N;
4064f7415efSAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
4074f7415efSAmlan Barua 
4084f7415efSAmlan Barua   PetscFunctionBegin;
4094f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
4104f7415efSAmlan Barua   PetscValidType(A,1);
411ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4124f7415efSAmlan Barua 
4134f8276c3SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
4144f8276c3SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
4154f7415efSAmlan Barua   if (size == 1) { /* sequential case */
4164f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4174f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
4184f7415efSAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
4194f7415efSAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
4204f7415efSAmlan Barua #else
4218ca4c034SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
4228ca4c034SAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
4238ca4c034SAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
4244f7415efSAmlan Barua #endif
42597504071SAmlan Barua   } else { /* parallel cases */
4264f7415efSAmlan Barua     ptrdiff_t    alloc_local,local_n0,local_0_start;
4279496c9aeSAmlan Barua     ptrdiff_t    local_n1;
4289496c9aeSAmlan Barua     fftw_complex *data_fout;
4299496c9aeSAmlan Barua     ptrdiff_t    local_1_start;
4309496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4319496c9aeSAmlan Barua     fftw_complex *data_fin,*data_bout;
4329496c9aeSAmlan Barua #else
4334f7415efSAmlan Barua     double    *data_finr,*data_boutr;
4346ccf0b3eSHong Zhang     PetscInt  n1,N1;
4359496c9aeSAmlan Barua     ptrdiff_t temp;
4369496c9aeSAmlan Barua #endif
4379496c9aeSAmlan Barua 
4384f7415efSAmlan Barua     switch (ndim) {
4394f7415efSAmlan Barua     case 1:
44057625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4414f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
44257625b84SAmlan Barua #else
44357625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
44457625b84SAmlan Barua       if (fin) {
44557625b84SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
446778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
44726fbe8dcSKarl Rupp 
44857625b84SAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
44957625b84SAmlan Barua       }
45057625b84SAmlan Barua       if (fout) {
45157625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
452778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
45326fbe8dcSKarl Rupp 
45457625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
45557625b84SAmlan Barua       }
45657625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
45757625b84SAmlan Barua       if (bout) {
45857625b84SAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
459778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
46026fbe8dcSKarl Rupp 
46157625b84SAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
46257625b84SAmlan Barua       }
46357625b84SAmlan Barua       break;
46457625b84SAmlan Barua #endif
4654f7415efSAmlan Barua     case 2:
46697504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
4674f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4684f7415efSAmlan Barua       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
4694f7415efSAmlan Barua       if (fin) {
4704f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
471778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
47226fbe8dcSKarl Rupp 
4734f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
4744f7415efSAmlan Barua       }
4754f7415efSAmlan Barua       if (bout) {
4764f7415efSAmlan Barua         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
477778a2246SBarry Smith         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
47826fbe8dcSKarl Rupp 
4794f7415efSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
4804f7415efSAmlan Barua       }
48157625b84SAmlan Barua       if (fout) {
48257625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
483778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
48426fbe8dcSKarl Rupp 
48557625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
48657625b84SAmlan Barua       }
4874f7415efSAmlan Barua #else
4884f7415efSAmlan Barua       /* Get local size */
4894f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
4904f7415efSAmlan Barua       if (fin) {
4914f7415efSAmlan Barua         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
492778a2246SBarry Smith         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
49326fbe8dcSKarl Rupp 
4944f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
4954f7415efSAmlan Barua       }
4964f7415efSAmlan Barua       if (fout) {
4974f7415efSAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
498778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
49926fbe8dcSKarl Rupp 
5004f7415efSAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
5014f7415efSAmlan Barua       }
5024f7415efSAmlan Barua       if (bout) {
5034f7415efSAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
504778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
50526fbe8dcSKarl Rupp 
5064f7415efSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5074f7415efSAmlan Barua       }
5084f7415efSAmlan Barua #endif
5094f7415efSAmlan Barua       break;
5104f7415efSAmlan Barua     case 3:
5114f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5124f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5134f7415efSAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
5144f7415efSAmlan Barua       if (fin) {
5154f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
516778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
51726fbe8dcSKarl Rupp 
5184f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5194f7415efSAmlan Barua       }
5204f7415efSAmlan Barua       if (bout) {
5214f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
522778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
52326fbe8dcSKarl Rupp 
5244f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5254f7415efSAmlan Barua       }
5263564f093SHong Zhang 
52757625b84SAmlan Barua       if (fout) {
52857625b84SAmlan Barua         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
529778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
53026fbe8dcSKarl Rupp 
53157625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
53257625b84SAmlan Barua       }
5334f7415efSAmlan Barua #else
5340c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
5350c9b18e4SAmlan Barua       if (fin) {
5360c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
537778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
53826fbe8dcSKarl Rupp 
5390c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5400c9b18e4SAmlan Barua       }
5410c9b18e4SAmlan Barua       if (fout) {
5420c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
543778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
54426fbe8dcSKarl Rupp 
5450c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5460c9b18e4SAmlan Barua       }
5470c9b18e4SAmlan Barua       if (bout) {
5480c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
549778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
55026fbe8dcSKarl Rupp 
5510c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5520c9b18e4SAmlan Barua       }
5534f7415efSAmlan Barua #endif
5544f7415efSAmlan Barua       break;
5554f7415efSAmlan Barua     default:
5564f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5574f7415efSAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
55826fbe8dcSKarl Rupp 
5594f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
56026fbe8dcSKarl Rupp 
5614f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5624f7415efSAmlan Barua       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
56326fbe8dcSKarl Rupp 
5644f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
5654f7415efSAmlan Barua 
5664f7415efSAmlan Barua       if (fin) {
5674f7415efSAmlan Barua         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
568778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
56926fbe8dcSKarl Rupp 
5704f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5714f7415efSAmlan Barua       }
5724f7415efSAmlan Barua       if (bout) {
5734f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
574778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
57526fbe8dcSKarl Rupp 
5769496c9aeSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5774f7415efSAmlan Barua       }
5783564f093SHong Zhang 
57957625b84SAmlan Barua       if (fout) {
58057625b84SAmlan Barua         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
581778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
58226fbe8dcSKarl Rupp 
58357625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
58457625b84SAmlan Barua       }
5854f7415efSAmlan Barua #else
5860c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
5870c9b18e4SAmlan Barua       if (fin) {
5880c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
589778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
59026fbe8dcSKarl Rupp 
5910c9b18e4SAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5920c9b18e4SAmlan Barua       }
5930c9b18e4SAmlan Barua       if (fout) {
5940c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
595778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
59626fbe8dcSKarl Rupp 
5970c9b18e4SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
5980c9b18e4SAmlan Barua       }
5990c9b18e4SAmlan Barua       if (bout) {
6000c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
601778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
60226fbe8dcSKarl Rupp 
6030c9b18e4SAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
6040c9b18e4SAmlan Barua       }
6054f7415efSAmlan Barua #endif
6064f7415efSAmlan Barua       break;
6074f7415efSAmlan Barua     }
6084f7415efSAmlan Barua   }
6094f7415efSAmlan Barua   PetscFunctionReturn(0);
6104f7415efSAmlan Barua }
6114f7415efSAmlan Barua 
612c92418ccSAmlan Barua #undef __FUNCT__
61374a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW"
6143564f093SHong Zhang /*@
6153564f093SHong Zhang    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
61654efbe56SHong Zhang 
6173564f093SHong Zhang    Collective on Mat
6183564f093SHong Zhang 
6193564f093SHong Zhang    Input Parameters:
6203564f093SHong Zhang +  A - FFTW matrix
6213564f093SHong Zhang -  x - the PETSc vector
6223564f093SHong Zhang 
6233564f093SHong Zhang    Output Parameters:
6243564f093SHong Zhang .  y - the FFTW vector
6253564f093SHong Zhang 
626b2b77a04SHong Zhang   Options Database Keys:
6273564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags
628b2b77a04SHong Zhang 
629b2b77a04SHong Zhang    Level: intermediate
6303564f093SHong Zhang 
63197504071SAmlan Barua    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
63297504071SAmlan Barua          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
63397504071SAmlan Barua          zeros. This routine does that job by scattering operation.
63497504071SAmlan Barua 
6353564f093SHong Zhang .seealso: VecScatterFFTWToPetsc()
6363564f093SHong Zhang @*/
6373564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
6383564f093SHong Zhang {
6393564f093SHong Zhang   PetscErrorCode ierr;
640b2b77a04SHong Zhang 
6413564f093SHong Zhang   PetscFunctionBegin;
6423564f093SHong Zhang   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
6433564f093SHong Zhang   PetscFunctionReturn(0);
6443564f093SHong Zhang }
6453c3a9c75SAmlan Barua 
6463c3a9c75SAmlan Barua #undef __FUNCT__
6471986ecc6SHong Zhang #define __FUNCT__ "VecScatterPetscToFFTW_FFTW"
64874a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
6493c3a9c75SAmlan Barua {
6503c3a9c75SAmlan Barua   PetscErrorCode ierr;
651ce94432eSBarry Smith   MPI_Comm       comm;
6523c3a9c75SAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
6533c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
6549496c9aeSAmlan Barua   PetscInt       N     =fft->N;
655b5d11533SAmlan Barua   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
6569496c9aeSAmlan Barua   PetscInt       low;
6577a21806cSHong Zhang   PetscMPIInt    rank,size;
6587a21806cSHong Zhang   PetscInt       vsize,vsize1;
6597a21806cSHong Zhang   ptrdiff_t      local_n0,local_0_start;
6609496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
6613c3a9c75SAmlan Barua   VecScatter     vecscat;
6623c3a9c75SAmlan Barua   IS             list1,list2;
6639496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6649496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
6659496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
6669496c9aeSAmlan Barua   PetscInt       N1, n1,NM;
6679496c9aeSAmlan Barua   ptrdiff_t      temp;
6689496c9aeSAmlan Barua #endif
6693c3a9c75SAmlan Barua 
6703564f093SHong Zhang   PetscFunctionBegin;
671ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
672f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
673f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
6740298fd71SBarry Smith   ierr = VecGetOwnershipRange(y,&low,NULL);
6753c3a9c75SAmlan Barua 
6763564f093SHong Zhang   if (size==1) {
6778ca4c034SAmlan Barua     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
6788ca4c034SAmlan Barua     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
6799496c9aeSAmlan Barua     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
6806971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
6816971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
6826971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
6836971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
684b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
6853564f093SHong Zhang   } else {
6863c3a9c75SAmlan Barua     switch (ndim) {
6873c3a9c75SAmlan Barua     case 1:
68864657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
6897a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
69026fbe8dcSKarl Rupp 
6914f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);
6924f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,low,1,&list2);
69364657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
69464657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
69564657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
69664657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
69764657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
69864657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
69964657d84SAmlan Barua #else
700e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
70164657d84SAmlan Barua #endif
7023c3a9c75SAmlan Barua       break;
7033c3a9c75SAmlan Barua     case 2:
704bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7057a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
70626fbe8dcSKarl Rupp 
7074f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
7084f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
709bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
710bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
711bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
712bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
713bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
714bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
715bd59e6c6SAmlan Barua #else
7163c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
71726fbe8dcSKarl Rupp 
7183c3a9c75SAmlan Barua       N1   = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
7199496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
7209496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
7213c3a9c75SAmlan Barua 
7223564f093SHong Zhang       if (dim[1]%2==0) {
7233c3a9c75SAmlan Barua         NM = dim[1]+2;
7243564f093SHong Zhang       } else {
7253c3a9c75SAmlan Barua         NM = dim[1]+1;
7263564f093SHong Zhang       }
7273c3a9c75SAmlan Barua       for (i=0; i<local_n0; i++) {
7283c3a9c75SAmlan Barua         for (j=0; j<dim[1]; j++) {
7293c3a9c75SAmlan Barua           tempindx  = i*dim[1] + j;
7303c3a9c75SAmlan Barua           tempindx1 = i*NM + j;
73126fbe8dcSKarl Rupp 
7325b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
7333c3a9c75SAmlan Barua           indx2[tempindx]=low+tempindx1;
7343c3a9c75SAmlan Barua         }
7353c3a9c75SAmlan Barua       }
7363c3a9c75SAmlan Barua 
7373c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
7383c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
7393c3a9c75SAmlan Barua 
740f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
741f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
742f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
743f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
744b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
745b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
746b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
747b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
748bd59e6c6SAmlan Barua #endif
7499496c9aeSAmlan Barua       break;
7503c3a9c75SAmlan Barua 
7513c3a9c75SAmlan Barua     case 3:
752bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7537a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
75426fbe8dcSKarl Rupp 
7554f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
7564f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
757bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
758bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
759bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
760bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
761bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
762bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
763bd59e6c6SAmlan Barua #else
7647a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
76526fbe8dcSKarl Rupp 
76651d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
76751d1eed7SAmlan Barua 
7689496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
7699496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
77051d1eed7SAmlan Barua 
771db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
772db4deed7SKarl Rupp       else             NM = dim[2]+1;
77351d1eed7SAmlan Barua 
77451d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
77551d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
77651d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
77751d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
77851d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
77926fbe8dcSKarl Rupp 
78051d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
78151d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
78251d1eed7SAmlan Barua           }
78351d1eed7SAmlan Barua         }
78451d1eed7SAmlan Barua       }
78551d1eed7SAmlan Barua 
78651d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
78751d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
78851d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
78951d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
79051d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
79151d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
792b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
793b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
794b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
795b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
796bd59e6c6SAmlan Barua #endif
7979496c9aeSAmlan Barua       break;
7983c3a9c75SAmlan Barua 
7993c3a9c75SAmlan Barua     default:
800bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
8017a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
80226fbe8dcSKarl Rupp 
8034f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
8044f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
805bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
806bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
807bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
808bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
809bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
810bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
811bd59e6c6SAmlan Barua #else
812e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
81326fbe8dcSKarl Rupp 
814e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
81526fbe8dcSKarl Rupp 
8167a21806cSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
81726fbe8dcSKarl Rupp 
818e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
81926fbe8dcSKarl Rupp 
820e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
821e5d7f247SAmlan Barua 
822e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
823e5d7f247SAmlan Barua 
824e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
825e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
826e5d7f247SAmlan Barua 
827db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
828db4deed7SKarl Rupp       else                  NM = 1;
829e5d7f247SAmlan Barua 
8306971673cSAmlan Barua       j = low;
8313564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
8326971673cSAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
8336971673cSAmlan Barua         indx2[i] = j;
83426fbe8dcSKarl Rupp         if (k%dim[ndim-1]==0) j+=NM;
8356971673cSAmlan Barua         j++;
8366971673cSAmlan Barua       }
8376971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
8386971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
8396971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
8406971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8416971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8426971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
843b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
844b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
845b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
846b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
847bd59e6c6SAmlan Barua #endif
8489496c9aeSAmlan Barua       break;
8493c3a9c75SAmlan Barua     }
850e81bb599SAmlan Barua   }
8513564f093SHong Zhang   PetscFunctionReturn(0);
8523c3a9c75SAmlan Barua }
8533c3a9c75SAmlan Barua 
8543c3a9c75SAmlan Barua #undef __FUNCT__
85574a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc"
8563564f093SHong Zhang /*@
8573564f093SHong Zhang    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
8583564f093SHong Zhang 
8593564f093SHong Zhang    Collective on Mat
8603564f093SHong Zhang 
8613564f093SHong Zhang     Input Parameters:
8623564f093SHong Zhang +   A - FFTW matrix
8633564f093SHong Zhang -   x - FFTW vector
8643564f093SHong Zhang 
8653564f093SHong Zhang    Output Parameters:
8663564f093SHong Zhang .  y - PETSc vector
8673564f093SHong Zhang 
8683564f093SHong Zhang    Level: intermediate
8693564f093SHong Zhang 
8703564f093SHong Zhang    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
8713564f093SHong Zhang          VecScatterFFTWToPetsc removes those extra zeros.
8723564f093SHong Zhang 
8733564f093SHong Zhang .seealso: VecScatterPetscToFFTW()
8743564f093SHong Zhang @*/
87574a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
8763c3a9c75SAmlan Barua {
8773c3a9c75SAmlan Barua   PetscErrorCode ierr;
8785fd66863SKarl Rupp 
8793c3a9c75SAmlan Barua   PetscFunctionBegin;
88074a26884SAmlan Barua   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
8813c3a9c75SAmlan Barua   PetscFunctionReturn(0);
8823c3a9c75SAmlan Barua }
88354efbe56SHong Zhang 
8843c3a9c75SAmlan Barua #undef __FUNCT__
88574a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
88674a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
8875b3e41ffSAmlan Barua {
8885b3e41ffSAmlan Barua   PetscErrorCode ierr;
889ce94432eSBarry Smith   MPI_Comm       comm;
8905b3e41ffSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
8915b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
8929496c9aeSAmlan Barua   PetscInt       N     = fft->N;
893b3655f67SAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
8949496c9aeSAmlan Barua   PetscInt       low;
8957a21806cSHong Zhang   PetscMPIInt    rank,size;
8967a21806cSHong Zhang   ptrdiff_t      local_n0,local_0_start;
8979496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
8985b3e41ffSAmlan Barua   VecScatter     vecscat;
8995b3e41ffSAmlan Barua   IS             list1,list2;
9009496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
9019496c9aeSAmlan Barua   PetscInt  i,j,k,partial_dim;
9029496c9aeSAmlan Barua   PetscInt  *indx1, *indx2, tempindx, tempindx1;
9039496c9aeSAmlan Barua   PetscInt  N1, n1,NM;
9049496c9aeSAmlan Barua   ptrdiff_t temp;
9059496c9aeSAmlan Barua #endif
9069496c9aeSAmlan Barua 
9073564f093SHong Zhang   PetscFunctionBegin;
908ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
9095b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
9105b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
9110298fd71SBarry Smith   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
9125b3e41ffSAmlan Barua 
913e81bb599SAmlan Barua   if (size==1) {
914b3655f67SAmlan Barua     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
9156971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
9166971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9176971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9186971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
919b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
920e81bb599SAmlan Barua 
9213564f093SHong Zhang   } else {
922e81bb599SAmlan Barua     switch (ndim) {
923e81bb599SAmlan Barua     case 1:
92464657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
9257a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
92626fbe8dcSKarl Rupp 
9274f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);
9284f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,low,1,&list2);
92964657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
93064657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
93164657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
93264657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
93364657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
93464657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
93564657d84SAmlan Barua #else
9366a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
93764657d84SAmlan Barua #endif
9385b3e41ffSAmlan Barua       break;
9395b3e41ffSAmlan Barua     case 2:
940bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
9417a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
94226fbe8dcSKarl Rupp 
9434f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
9444f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
945bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
946bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
947bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
948bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
949bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
950bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
951bd59e6c6SAmlan Barua #else
9527a21806cSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
95326fbe8dcSKarl Rupp 
9545b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
9555b3e41ffSAmlan Barua 
9569496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
9579496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
9585b3e41ffSAmlan Barua 
959db4deed7SKarl Rupp       if (dim[1]%2==0) NM = dim[1]+2;
960db4deed7SKarl Rupp       else             NM = dim[1]+1;
9615b3e41ffSAmlan Barua 
9625b3e41ffSAmlan Barua       for (i=0; i<local_n0; i++) {
9635b3e41ffSAmlan Barua         for (j=0; j<dim[1]; j++) {
9645b3e41ffSAmlan Barua           tempindx = i*dim[1] + j;
9655b3e41ffSAmlan Barua           tempindx1 = i*NM + j;
96626fbe8dcSKarl Rupp 
9675b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
9685b3e41ffSAmlan Barua           indx2[tempindx]=low+tempindx1;
9695b3e41ffSAmlan Barua         }
9705b3e41ffSAmlan Barua       }
9715b3e41ffSAmlan Barua 
9725b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
9735b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
9745b3e41ffSAmlan Barua 
9755b3e41ffSAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
9765b3e41ffSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9775b3e41ffSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9785b3e41ffSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
979b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
980b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
981b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
982b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
983bd59e6c6SAmlan Barua #endif
9849496c9aeSAmlan Barua       break;
9855b3e41ffSAmlan Barua     case 3:
986bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
9877a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
98826fbe8dcSKarl Rupp 
9894f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
9904f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
991bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
992bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
993bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
994bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
995bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
996bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
997bd59e6c6SAmlan Barua #else
998bd59e6c6SAmlan Barua 
9997a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
100026fbe8dcSKarl Rupp 
100151d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
100251d1eed7SAmlan Barua 
10039496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
10049496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
100551d1eed7SAmlan Barua 
1006db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
1007db4deed7SKarl Rupp       else             NM = dim[2]+1;
100851d1eed7SAmlan Barua 
100951d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
101051d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
101151d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
101251d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
101351d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
101426fbe8dcSKarl Rupp 
101551d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
101651d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
101751d1eed7SAmlan Barua           }
101851d1eed7SAmlan Barua         }
101951d1eed7SAmlan Barua       }
102051d1eed7SAmlan Barua 
102151d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
102251d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
102351d1eed7SAmlan Barua 
102451d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
102551d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
102651d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
102751d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1028b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1029b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1030b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1031b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1032bd59e6c6SAmlan Barua #endif
10339496c9aeSAmlan Barua       break;
10345b3e41ffSAmlan Barua     default:
1035bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10367a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
103726fbe8dcSKarl Rupp 
10384f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
10394f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1040bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1041bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1043bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1044bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1045bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1046bd59e6c6SAmlan Barua #else
1047ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
104826fbe8dcSKarl Rupp 
1049ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
105026fbe8dcSKarl Rupp 
1051ba6e06d0SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
105226fbe8dcSKarl Rupp 
1053ba6e06d0SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
105426fbe8dcSKarl Rupp 
1055ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1056ba6e06d0SAmlan Barua 
1057ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1058ba6e06d0SAmlan Barua 
1059ba6e06d0SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1060ba6e06d0SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1061ba6e06d0SAmlan Barua 
1062db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
1063db4deed7SKarl Rupp       else                  NM = 1;
1064ba6e06d0SAmlan Barua 
1065ba6e06d0SAmlan Barua       j = low;
10663564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1067ba6e06d0SAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
1068ba6e06d0SAmlan Barua         indx2[i] = j;
10693564f093SHong Zhang         if (k%dim[ndim-1]==0) j+=NM;
1070ba6e06d0SAmlan Barua         j++;
1071ba6e06d0SAmlan Barua       }
1072ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1073ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1074ba6e06d0SAmlan Barua 
1075ba6e06d0SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1076ba6e06d0SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1077ba6e06d0SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1078ba6e06d0SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1079b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1080b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1081b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1082b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1083bd59e6c6SAmlan Barua #endif
10849496c9aeSAmlan Barua       break;
10855b3e41ffSAmlan Barua     }
1086e81bb599SAmlan Barua   }
10873564f093SHong Zhang   PetscFunctionReturn(0);
10885b3e41ffSAmlan Barua }
10895b3e41ffSAmlan Barua 
10905b3e41ffSAmlan Barua #undef __FUNCT__
10913c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
10923c3a9c75SAmlan Barua /*
10933564f093SHong Zhang     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
10943564f093SHong Zhang 
10953c3a9c75SAmlan Barua   Options Database Keys:
10963c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
10973c3a9c75SAmlan Barua 
10983c3a9c75SAmlan Barua    Level: intermediate
10993c3a9c75SAmlan Barua 
11003c3a9c75SAmlan Barua */
11018cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1102b2b77a04SHong Zhang {
1103b2b77a04SHong Zhang   PetscErrorCode ierr;
1104ce94432eSBarry Smith   MPI_Comm       comm;
1105b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1106b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1107b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
11085274ed1bSHong Zhang   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
11095274ed1bSHong Zhang   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1110b2b77a04SHong Zhang   PetscBool      flg;
11114f48bc67SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr;
111211902ff2SHong Zhang   PetscMPIInt    size,rank;
11139496c9aeSAmlan Barua   ptrdiff_t      *pdim;
11149496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
11159496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11169496c9aeSAmlan Barua   ptrdiff_t temp;
11178ca4c034SAmlan Barua   PetscInt  N1,tot_dim;
11184f48bc67SAmlan Barua #else
11194f48bc67SAmlan Barua   PetscInt n1;
11209496c9aeSAmlan Barua #endif
11219496c9aeSAmlan Barua 
1122b2b77a04SHong Zhang   PetscFunctionBegin;
1123ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1124b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
112511902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1126c92418ccSAmlan Barua 
11271878d478SAmlan Barua   fftw_mpi_init();
112811902ff2SHong Zhang   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
112911902ff2SHong Zhang   pdim[0] = dim[0];
11308ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11318ca4c034SAmlan Barua   tot_dim = 2*dim[0];
11328ca4c034SAmlan Barua #endif
11333564f093SHong Zhang   for (ctr=1; ctr<ndim; ctr++) {
11346882372aSHong Zhang     partial_dim *= dim[ctr];
113511902ff2SHong Zhang     pdim[ctr]    = dim[ctr];
11368ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1137db4deed7SKarl Rupp     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1138db4deed7SKarl Rupp     else             tot_dim *= dim[ctr];
11398ca4c034SAmlan Barua #endif
11406882372aSHong Zhang   }
11413c3a9c75SAmlan Barua 
1142b2b77a04SHong Zhang   if (size == 1) {
1143e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1144b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1145b2b77a04SHong Zhang     n    = N;
1146e81bb599SAmlan Barua #else
1147e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1148e81bb599SAmlan Barua     n    = tot_dim;
1149e81bb599SAmlan Barua #endif
1150e81bb599SAmlan Barua 
1151b2b77a04SHong Zhang   } else {
11527a21806cSHong Zhang     ptrdiff_t local_n0,local_0_start;
1153b2b77a04SHong Zhang     switch (ndim) {
1154b2b77a04SHong Zhang     case 1:
11553c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11563c3a9c75SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1157e5d7f247SAmlan Barua #else
11587a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
115926fbe8dcSKarl Rupp 
11606882372aSHong Zhang       n    = (PetscInt)local_n0;
11619496c9aeSAmlan Barua       n1   = (PetscInt)local_n1;
11629496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1163e5d7f247SAmlan Barua #endif
1164b2b77a04SHong Zhang       break;
1165b2b77a04SHong Zhang     case 2:
11665b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
11677a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1168b2b77a04SHong Zhang       /*
1169b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
11700ec8b6e3SBarry Smith        PetscSynchronizedFlush(comm,PETSC_STDOUT);
1171b2b77a04SHong Zhang        */
1172b2b77a04SHong Zhang       n    = (PetscInt)local_n0*dim[1];
1173b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
11745b3e41ffSAmlan Barua #else
11754f8276c3SHong Zhang       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
117626fbe8dcSKarl Rupp 
11775b3e41ffSAmlan Barua       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
1178c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
11795b3e41ffSAmlan Barua #endif
1180b2b77a04SHong Zhang       break;
1181b2b77a04SHong Zhang     case 3:
118251d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
11837a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
118426fbe8dcSKarl Rupp 
11856882372aSHong Zhang       n    = (PetscInt)local_n0*dim[1]*dim[2];
11866882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
118751d1eed7SAmlan Barua #else
11884f8276c3SHong Zhang       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
118926fbe8dcSKarl Rupp 
119051d1eed7SAmlan Barua       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1191c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
119251d1eed7SAmlan Barua #endif
1193b2b77a04SHong Zhang       break;
1194b2b77a04SHong Zhang     default:
1195b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
11967a21806cSHong Zhang       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
119726fbe8dcSKarl Rupp 
11986882372aSHong Zhang       n    = (PetscInt)local_n0*partial_dim;
11996882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1200b3a17365SAmlan Barua #else
1201b3a17365SAmlan Barua       temp = pdim[ndim-1];
120226fbe8dcSKarl Rupp 
1203b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
120426fbe8dcSKarl Rupp 
12054f8276c3SHong Zhang       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
120626fbe8dcSKarl Rupp 
1207b3a17365SAmlan Barua       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1208b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
120926fbe8dcSKarl Rupp 
1210b3a17365SAmlan Barua       pdim[ndim-1] = temp;
121126fbe8dcSKarl Rupp 
1212c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1213b3a17365SAmlan Barua #endif
1214b2b77a04SHong Zhang       break;
1215b2b77a04SHong Zhang     }
1216b2b77a04SHong Zhang   }
1217b2b77a04SHong Zhang   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1218b00a9115SJed Brown   ierr      = PetscNewLog(A,&fftw);CHKERRQ(ierr);
1219b2b77a04SHong Zhang   fft->data = (void*)fftw;
1220b2b77a04SHong Zhang 
1221b2b77a04SHong Zhang   fft->n            = n;
12220c74a584SJed Brown   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1223e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
122426fbe8dcSKarl Rupp 
12255e806cb5SMatthew G. Knepley   ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr);
12268c1d5ab3SHong Zhang   if (size == 1) {
12278c1d5ab3SHong Zhang     /* ierr = PetscMalloc(ndim, &(fftw->iodims));CHKERRQ(ierr);  error! */
12288c1d5ab3SHong Zhang     /* ierr = PetscMalloc(ndim*sizeof(fftw_iodim),&(fftw->iodims));CHKERRQ(ierr); -not sure if this is ok */
12298c1d5ab3SHong Zhang     fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
12308c1d5ab3SHong Zhang   }
123126fbe8dcSKarl Rupp 
1232b1301b2eSHong Zhang   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1233c92418ccSAmlan Barua 
1234b2b77a04SHong Zhang   fftw->p_forward  = 0;
1235b2b77a04SHong Zhang   fftw->p_backward = 0;
1236b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1237b2b77a04SHong Zhang 
1238b2b77a04SHong Zhang   if (size == 1) {
1239b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1240b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1241b2b77a04SHong Zhang   } else {
1242b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1243b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1244b2b77a04SHong Zhang   }
1245b2b77a04SHong Zhang   fft->matdestroy = MatDestroy_FFTW;
1246b2b77a04SHong Zhang   A->assembled    = PETSC_TRUE;
12474a52bad8SHong Zhang   A->preallocated = PETSC_TRUE;
124826fbe8dcSKarl Rupp 
1249bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr);
1250bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
1251bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1252b2b77a04SHong Zhang 
1253b2b77a04SHong Zhang   /* get runtime options */
1254ce94432eSBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
12555274ed1bSHong Zhang   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr);
12565274ed1bSHong Zhang   if (flg) {
12575274ed1bSHong Zhang     fftw->p_flag = iplans[p_flag];
12585274ed1bSHong Zhang   }
12594a52bad8SHong Zhang   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1260b2b77a04SHong Zhang   PetscFunctionReturn(0);
1261b2b77a04SHong Zhang }
12623c3a9c75SAmlan Barua 
12633c3a9c75SAmlan Barua 
12643c3a9c75SAmlan Barua 
12653c3a9c75SAmlan Barua 
1266