xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 5274ed1b5914009ea1e03c499b73268a814b584e)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14e5d7f247SAmlan Barua   PetscInt    partial_dim;
15b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
16b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
19b2b77a04SHong Zhang } Mat_FFTW;
20b2b77a04SHong Zhang 
21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27b2b77a04SHong Zhang 
2897504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel
2997504071SAmlan Barua    Input parameter:
303564f093SHong Zhang      A - the matrix
3197504071SAmlan Barua      x - the vector on which FDFT will be performed
3297504071SAmlan Barua 
3397504071SAmlan Barua    Output parameter:
3497504071SAmlan Barua      y - vector that stores result of FDFT
3597504071SAmlan Barua */
36b2b77a04SHong Zhang #undef __FUNCT__
37b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
38b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
39b2b77a04SHong Zhang {
40b2b77a04SHong Zhang   PetscErrorCode ierr;
41b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
42b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
43b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
44b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
45b2b77a04SHong Zhang 
46b2b77a04SHong Zhang   PetscFunctionBegin;
47b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
48b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
49b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
50b2b77a04SHong Zhang     switch (ndim) {
51b2b77a04SHong Zhang     case 1:
5258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
53b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
5458a912c5SAmlan Barua #else
5558a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
5658a912c5SAmlan Barua #endif
57b2b77a04SHong Zhang       break;
58b2b77a04SHong Zhang     case 2:
5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
60b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6158a912c5SAmlan Barua #else
6258a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
6358a912c5SAmlan Barua #endif
64b2b77a04SHong Zhang       break;
65b2b77a04SHong Zhang     case 3:
6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
67b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6858a912c5SAmlan Barua #else
6958a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
7058a912c5SAmlan Barua #endif
71b2b77a04SHong Zhang       break;
72b2b77a04SHong Zhang     default:
7358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
74b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
75*5274ed1bSHong Zhang       /* bug: dim[3] changes from 10 to 0 '--with-64-bit-indices=1' */
7658a912c5SAmlan Barua #else
7758a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
7858a912c5SAmlan Barua #endif
79b2b77a04SHong Zhang       break;
80b2b77a04SHong Zhang     }
81b2b77a04SHong Zhang     fftw->finarray  = x_array;
82b2b77a04SHong Zhang     fftw->foutarray = y_array;
83b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
84b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
85b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
86b2b77a04SHong Zhang   } else { /* use existing plan */
87b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
88b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
89b2b77a04SHong Zhang     } else {
90b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
91b2b77a04SHong Zhang     }
92b2b77a04SHong Zhang   }
93b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
94b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
95b2b77a04SHong Zhang   PetscFunctionReturn(0);
96b2b77a04SHong Zhang }
97b2b77a04SHong Zhang 
9897504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT
9997504071SAmlan Barua    Input parameter:
1003564f093SHong Zhang      A - the matrix
10197504071SAmlan Barua      x - the vector on which BDFT will be performed
10297504071SAmlan Barua 
10397504071SAmlan Barua    Output parameter:
10497504071SAmlan Barua      y - vector that stores result of BDFT
10597504071SAmlan Barua */
10697504071SAmlan Barua 
107b2b77a04SHong Zhang #undef __FUNCT__
108b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
109b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
110b2b77a04SHong Zhang {
111b2b77a04SHong Zhang   PetscErrorCode ierr;
112b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
113b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
114b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
115b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
116b2b77a04SHong Zhang 
117b2b77a04SHong Zhang   PetscFunctionBegin;
118b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
119b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
120b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
121b2b77a04SHong Zhang     switch (ndim) {
122b2b77a04SHong Zhang     case 1:
12358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
124b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12558a912c5SAmlan Barua #else
126e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
12758a912c5SAmlan Barua #endif
128b2b77a04SHong Zhang       break;
129b2b77a04SHong Zhang     case 2:
13058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
131b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
13258a912c5SAmlan Barua #else
133e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
13458a912c5SAmlan Barua #endif
135b2b77a04SHong Zhang       break;
136b2b77a04SHong Zhang     case 3:
13758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
138b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
13958a912c5SAmlan Barua #else
140e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
14158a912c5SAmlan Barua #endif
142b2b77a04SHong Zhang       break;
143b2b77a04SHong Zhang     default:
14458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
145b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
14658a912c5SAmlan Barua #else
147e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
14858a912c5SAmlan Barua #endif
149b2b77a04SHong Zhang       break;
150b2b77a04SHong Zhang     }
151b2b77a04SHong Zhang     fftw->binarray  = x_array;
152b2b77a04SHong Zhang     fftw->boutarray = y_array;
153b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
154b2b77a04SHong Zhang   } else { /* use existing plan */
155b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
156b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
157b2b77a04SHong Zhang     } else {
158b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
159b2b77a04SHong Zhang     }
160b2b77a04SHong Zhang   }
161b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
162b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
163b2b77a04SHong Zhang   PetscFunctionReturn(0);
164b2b77a04SHong Zhang }
165b2b77a04SHong Zhang 
16697504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel
16797504071SAmlan Barua    Input parameter:
1683564f093SHong Zhang      A - the matrix
16997504071SAmlan Barua      x - the vector on which FDFT will be performed
17097504071SAmlan Barua 
17197504071SAmlan Barua    Output parameter:
17297504071SAmlan Barua    y   - vector that stores result of FDFT
17397504071SAmlan Barua */
174b2b77a04SHong Zhang #undef __FUNCT__
175b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
176b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
177b2b77a04SHong Zhang {
178b2b77a04SHong Zhang   PetscErrorCode ierr;
179b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
180b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
181b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
182c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
183ce94432eSBarry Smith   MPI_Comm       comm;
184b2b77a04SHong Zhang 
185b2b77a04SHong Zhang   PetscFunctionBegin;
186ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
187b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
188b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
189b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
190b2b77a04SHong Zhang     switch (ndim) {
191b2b77a04SHong Zhang     case 1:
1923c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
193b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
194ae0a50aaSHong Zhang #else
1954f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
1963c3a9c75SAmlan Barua #endif
197b2b77a04SHong Zhang       break;
198b2b77a04SHong Zhang     case 2:
19997504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
200b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2013c3a9c75SAmlan Barua #else
2023c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2033c3a9c75SAmlan Barua #endif
204b2b77a04SHong Zhang       break;
205b2b77a04SHong Zhang     case 3:
2063c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
207b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2083c3a9c75SAmlan Barua #else
20951d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2103c3a9c75SAmlan Barua #endif
211b2b77a04SHong Zhang       break;
212b2b77a04SHong Zhang     default:
2133c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
214c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2153c3a9c75SAmlan Barua #else
2163c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2173c3a9c75SAmlan Barua #endif
218b2b77a04SHong Zhang       break;
219b2b77a04SHong Zhang     }
220b2b77a04SHong Zhang     fftw->finarray  = x_array;
221b2b77a04SHong Zhang     fftw->foutarray = y_array;
222b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
223b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
224b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
225b2b77a04SHong Zhang   } else { /* use existing plan */
226b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
227b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
228b2b77a04SHong Zhang     } else {
229b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
230b2b77a04SHong Zhang     }
231b2b77a04SHong Zhang   }
232b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
233b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
234b2b77a04SHong Zhang   PetscFunctionReturn(0);
235b2b77a04SHong Zhang }
236b2b77a04SHong Zhang 
23797504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT
23897504071SAmlan Barua    Input parameter:
2393564f093SHong Zhang      A - the matrix
24097504071SAmlan Barua      x - the vector on which BDFT will be performed
24197504071SAmlan Barua 
24297504071SAmlan Barua    Output parameter:
24397504071SAmlan Barua      y - vector that stores result of BDFT
24497504071SAmlan Barua */
245b2b77a04SHong Zhang #undef __FUNCT__
246b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
247b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
248b2b77a04SHong Zhang {
249b2b77a04SHong Zhang   PetscErrorCode ierr;
250b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
251b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
252b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
253c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
254ce94432eSBarry Smith   MPI_Comm       comm;
255b2b77a04SHong Zhang 
256b2b77a04SHong Zhang   PetscFunctionBegin;
257ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
258b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
259b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
260b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
261b2b77a04SHong Zhang     switch (ndim) {
262b2b77a04SHong Zhang     case 1:
2633c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
264b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
265ae0a50aaSHong Zhang #else
2664f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
2673c3a9c75SAmlan Barua #endif
268b2b77a04SHong Zhang       break;
269b2b77a04SHong Zhang     case 2:
27097504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
271b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2723c3a9c75SAmlan Barua #else
2733c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
2743c3a9c75SAmlan Barua #endif
275b2b77a04SHong Zhang       break;
276b2b77a04SHong Zhang     case 3:
2773c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
278b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2793c3a9c75SAmlan Barua #else
2803c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
2813c3a9c75SAmlan Barua #endif
282b2b77a04SHong Zhang       break;
283b2b77a04SHong Zhang     default:
2843c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
285c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2863c3a9c75SAmlan Barua #else
2873c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
2883c3a9c75SAmlan Barua #endif
289b2b77a04SHong Zhang       break;
290b2b77a04SHong Zhang     }
291b2b77a04SHong Zhang     fftw->binarray  = x_array;
292b2b77a04SHong Zhang     fftw->boutarray = y_array;
293b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
294b2b77a04SHong Zhang   } else { /* use existing plan */
295b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
296b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
297b2b77a04SHong Zhang     } else {
298b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
299b2b77a04SHong Zhang     }
300b2b77a04SHong Zhang   }
301b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
302b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
303b2b77a04SHong Zhang   PetscFunctionReturn(0);
304b2b77a04SHong Zhang }
305b2b77a04SHong Zhang 
306b2b77a04SHong Zhang #undef __FUNCT__
307b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
308b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
309b2b77a04SHong Zhang {
310b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
311b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
312b2b77a04SHong Zhang   PetscErrorCode ierr;
313b2b77a04SHong Zhang 
314b2b77a04SHong Zhang   PetscFunctionBegin;
315b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
316b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
317b1301b2eSHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
318bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
3196ccf0b3eSHong Zhang   fftw_mpi_cleanup();
320b2b77a04SHong Zhang   PetscFunctionReturn(0);
321b2b77a04SHong Zhang }
322b2b77a04SHong Zhang 
323c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
324b2b77a04SHong Zhang #undef __FUNCT__
325b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
326b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
327b2b77a04SHong Zhang {
328b2b77a04SHong Zhang   PetscErrorCode ierr;
329b2b77a04SHong Zhang   PetscScalar    *array;
330b2b77a04SHong Zhang 
331b2b77a04SHong Zhang   PetscFunctionBegin;
332b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
333b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
334b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
335b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
336b2b77a04SHong Zhang   PetscFunctionReturn(0);
337b2b77a04SHong Zhang }
338b2b77a04SHong Zhang 
3394f7415efSAmlan Barua #undef __FUNCT__
3404be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW"
3414be45526SHong Zhang /*@
342b2aa233eSHong Zhang    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
3434f7415efSAmlan Barua      parallel layout determined by FFTW
3444f7415efSAmlan Barua 
3454f7415efSAmlan Barua    Collective on Mat
3464f7415efSAmlan Barua 
3474f7415efSAmlan Barua    Input Parameter:
3483564f093SHong Zhang .   A - the matrix
3494f7415efSAmlan Barua 
3504f7415efSAmlan Barua    Output Parameter:
351cc55f3b1SHong Zhang +   x - (optional) input vector of forward FFTW
352cc55f3b1SHong Zhang -   y - (optional) output vector of forward FFTW
353cc55f3b1SHong Zhang -   z - (optional) output vector of backward FFTW
3544f7415efSAmlan Barua 
3554f7415efSAmlan Barua   Level: advanced
3563564f093SHong Zhang 
35797504071SAmlan Barua   Note: The parallel layout of output of forward FFTW is always same as the input
35897504071SAmlan Barua         of the backward FFTW. But parallel layout of the input vector of forward
35997504071SAmlan Barua         FFTW might not be same as the output of backward FFTW.
36097504071SAmlan Barua         Also note that we need to provide enough space while doing parallel real transform.
36197504071SAmlan Barua         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
36297504071SAmlan Barua         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
363e0ec536eSAmlan Barua         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
364e0ec536eSAmlan Barua         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
365e0ec536eSAmlan Barua         zeros if it is odd only one zero is needed.
366e0ec536eSAmlan Barua         Lastly one needs some scratch space at the end of data set in each process. alloc_local
367e0ec536eSAmlan Barua         figures out how much space is needed, i.e. it figures out the data+scratch space for
368e0ec536eSAmlan Barua         each processor and returns that.
3694f7415efSAmlan Barua 
3704f7415efSAmlan Barua .seealso: MatCreateFFTW()
3714be45526SHong Zhang @*/
3724be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
3734be45526SHong Zhang {
3744be45526SHong Zhang   PetscErrorCode ierr;
375b4c3921fSHong Zhang 
3764be45526SHong Zhang   PetscFunctionBegin;
3774be45526SHong Zhang   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
3784be45526SHong Zhang   PetscFunctionReturn(0);
3794be45526SHong Zhang }
3804be45526SHong Zhang 
3814be45526SHong Zhang #undef __FUNCT__
3824be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW"
3834be45526SHong Zhang PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
3844f7415efSAmlan Barua {
3854f7415efSAmlan Barua   PetscErrorCode ierr;
3864f7415efSAmlan Barua   PetscMPIInt    size,rank;
387ce94432eSBarry Smith   MPI_Comm       comm;
3884f7415efSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
3894f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
3909496c9aeSAmlan Barua   PetscInt       N     = fft->N;
3914f7415efSAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
3924f7415efSAmlan Barua 
3934f7415efSAmlan Barua   PetscFunctionBegin;
3944f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
3954f7415efSAmlan Barua   PetscValidType(A,1);
396ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
3974f7415efSAmlan Barua 
3984f8276c3SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
3994f8276c3SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
4004f7415efSAmlan Barua   if (size == 1) { /* sequential case */
4014f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4024f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
4034f7415efSAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
4044f7415efSAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
4054f7415efSAmlan Barua #else
4068ca4c034SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
4078ca4c034SAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
4088ca4c034SAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
4094f7415efSAmlan Barua #endif
41097504071SAmlan Barua   } else { /* parallel cases */
4114f7415efSAmlan Barua     ptrdiff_t    alloc_local,local_n0,local_0_start;
4129496c9aeSAmlan Barua     ptrdiff_t    local_n1;
4139496c9aeSAmlan Barua     fftw_complex *data_fout;
4149496c9aeSAmlan Barua     ptrdiff_t    local_1_start;
4159496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4169496c9aeSAmlan Barua     fftw_complex *data_fin,*data_bout;
4179496c9aeSAmlan Barua #else
4184f7415efSAmlan Barua     double    *data_finr,*data_boutr;
4196ccf0b3eSHong Zhang     PetscInt  n1,N1;
4209496c9aeSAmlan Barua     ptrdiff_t temp;
4219496c9aeSAmlan Barua #endif
4229496c9aeSAmlan Barua 
4234f7415efSAmlan Barua     switch (ndim) {
4244f7415efSAmlan Barua     case 1:
42557625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4264f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
42757625b84SAmlan Barua #else
42857625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
42957625b84SAmlan Barua       if (fin) {
43057625b84SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
431778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
43226fbe8dcSKarl Rupp 
43357625b84SAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
43457625b84SAmlan Barua       }
43557625b84SAmlan Barua       if (fout) {
43657625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
437778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
43826fbe8dcSKarl Rupp 
43957625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
44057625b84SAmlan Barua       }
44157625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
44257625b84SAmlan Barua       if (bout) {
44357625b84SAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
444778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
44526fbe8dcSKarl Rupp 
44657625b84SAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
44757625b84SAmlan Barua       }
44857625b84SAmlan Barua       break;
44957625b84SAmlan Barua #endif
4504f7415efSAmlan Barua     case 2:
45197504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
4524f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4534f7415efSAmlan Barua       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
4544f7415efSAmlan Barua       if (fin) {
4554f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
456778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
45726fbe8dcSKarl Rupp 
4584f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
4594f7415efSAmlan Barua       }
4604f7415efSAmlan Barua       if (bout) {
4614f7415efSAmlan Barua         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
462778a2246SBarry Smith         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
46326fbe8dcSKarl Rupp 
4644f7415efSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
4654f7415efSAmlan Barua       }
46657625b84SAmlan Barua       if (fout) {
46757625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
468778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
46926fbe8dcSKarl Rupp 
47057625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
47157625b84SAmlan Barua       }
4724f7415efSAmlan Barua #else
4734f7415efSAmlan Barua       /* Get local size */
4744f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
4754f7415efSAmlan Barua       if (fin) {
4764f7415efSAmlan Barua         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
477778a2246SBarry Smith         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
47826fbe8dcSKarl Rupp 
4794f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
4804f7415efSAmlan Barua       }
4814f7415efSAmlan Barua       if (fout) {
4824f7415efSAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
483778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
48426fbe8dcSKarl Rupp 
4854f7415efSAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
4864f7415efSAmlan Barua       }
4874f7415efSAmlan Barua       if (bout) {
4884f7415efSAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
489778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
49026fbe8dcSKarl Rupp 
4914f7415efSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
4924f7415efSAmlan Barua       }
4934f7415efSAmlan Barua #endif
4944f7415efSAmlan Barua       break;
4954f7415efSAmlan Barua     case 3:
4964f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4974f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4984f7415efSAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
4994f7415efSAmlan Barua       if (fin) {
5004f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
501778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
50226fbe8dcSKarl Rupp 
5034f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5044f7415efSAmlan Barua       }
5054f7415efSAmlan Barua       if (bout) {
5064f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
507778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
50826fbe8dcSKarl Rupp 
5094f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5104f7415efSAmlan Barua       }
5113564f093SHong Zhang 
51257625b84SAmlan Barua       if (fout) {
51357625b84SAmlan Barua         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
514778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
51526fbe8dcSKarl Rupp 
51657625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
51757625b84SAmlan Barua       }
5184f7415efSAmlan Barua #else
5190c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
5200c9b18e4SAmlan Barua       if (fin) {
5210c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
522778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
52326fbe8dcSKarl Rupp 
5240c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5250c9b18e4SAmlan Barua       }
5260c9b18e4SAmlan Barua       if (fout) {
5270c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
528778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
52926fbe8dcSKarl Rupp 
5300c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5310c9b18e4SAmlan Barua       }
5320c9b18e4SAmlan Barua       if (bout) {
5330c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
534778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
53526fbe8dcSKarl Rupp 
5360c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5370c9b18e4SAmlan Barua       }
5384f7415efSAmlan Barua #endif
5394f7415efSAmlan Barua       break;
5404f7415efSAmlan Barua     default:
5414f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5424f7415efSAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
54326fbe8dcSKarl Rupp 
5444f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
54526fbe8dcSKarl Rupp 
5464f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5474f7415efSAmlan Barua       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
54826fbe8dcSKarl Rupp 
5494f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
5504f7415efSAmlan Barua 
5514f7415efSAmlan Barua       if (fin) {
5524f7415efSAmlan Barua         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
553778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
55426fbe8dcSKarl Rupp 
5554f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5564f7415efSAmlan Barua       }
5574f7415efSAmlan Barua       if (bout) {
5584f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
559778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
56026fbe8dcSKarl Rupp 
5619496c9aeSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5624f7415efSAmlan Barua       }
5633564f093SHong Zhang 
56457625b84SAmlan Barua       if (fout) {
56557625b84SAmlan Barua         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
566778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
56726fbe8dcSKarl Rupp 
56857625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
56957625b84SAmlan Barua       }
5704f7415efSAmlan Barua #else
5710c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
5720c9b18e4SAmlan Barua       if (fin) {
5730c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
574778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
57526fbe8dcSKarl Rupp 
5760c9b18e4SAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5770c9b18e4SAmlan Barua       }
5780c9b18e4SAmlan Barua       if (fout) {
5790c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
580778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
58126fbe8dcSKarl Rupp 
5820c9b18e4SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
5830c9b18e4SAmlan Barua       }
5840c9b18e4SAmlan Barua       if (bout) {
5850c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
586778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
58726fbe8dcSKarl Rupp 
5880c9b18e4SAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5890c9b18e4SAmlan Barua       }
5904f7415efSAmlan Barua #endif
5914f7415efSAmlan Barua       break;
5924f7415efSAmlan Barua     }
5934f7415efSAmlan Barua   }
5944f7415efSAmlan Barua   PetscFunctionReturn(0);
5954f7415efSAmlan Barua }
5964f7415efSAmlan Barua 
597c92418ccSAmlan Barua #undef __FUNCT__
59874a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW"
5993564f093SHong Zhang /*@
6003564f093SHong Zhang    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
60154efbe56SHong Zhang 
6023564f093SHong Zhang    Collective on Mat
6033564f093SHong Zhang 
6043564f093SHong Zhang    Input Parameters:
6053564f093SHong Zhang +  A - FFTW matrix
6063564f093SHong Zhang -  x - the PETSc vector
6073564f093SHong Zhang 
6083564f093SHong Zhang    Output Parameters:
6093564f093SHong Zhang .  y - the FFTW vector
6103564f093SHong Zhang 
611b2b77a04SHong Zhang   Options Database Keys:
6123564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags
613b2b77a04SHong Zhang 
614b2b77a04SHong Zhang    Level: intermediate
6153564f093SHong Zhang 
61697504071SAmlan Barua    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
61797504071SAmlan Barua          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
61897504071SAmlan Barua          zeros. This routine does that job by scattering operation.
61997504071SAmlan Barua 
6203564f093SHong Zhang .seealso: VecScatterFFTWToPetsc()
6213564f093SHong Zhang @*/
6223564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
6233564f093SHong Zhang {
6243564f093SHong Zhang   PetscErrorCode ierr;
625b2b77a04SHong Zhang 
6263564f093SHong Zhang   PetscFunctionBegin;
6273564f093SHong Zhang   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
6283564f093SHong Zhang   PetscFunctionReturn(0);
6293564f093SHong Zhang }
6303c3a9c75SAmlan Barua 
6313c3a9c75SAmlan Barua #undef __FUNCT__
6321986ecc6SHong Zhang #define __FUNCT__ "VecScatterPetscToFFTW_FFTW"
63374a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
6343c3a9c75SAmlan Barua {
6353c3a9c75SAmlan Barua   PetscErrorCode ierr;
636ce94432eSBarry Smith   MPI_Comm       comm;
6373c3a9c75SAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
6383c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
6399496c9aeSAmlan Barua   PetscInt       N     =fft->N;
640b5d11533SAmlan Barua   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
6419496c9aeSAmlan Barua   PetscInt       low;
6428ca4c034SAmlan Barua   PetscInt       rank,size,vsize,vsize1;
6433c3a9c75SAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
6449496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
6453c3a9c75SAmlan Barua   VecScatter     vecscat;
6463c3a9c75SAmlan Barua   IS             list1,list2;
6479496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6489496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
6499496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
6509496c9aeSAmlan Barua   PetscInt       N1, n1,NM;
6519496c9aeSAmlan Barua   ptrdiff_t      temp;
6529496c9aeSAmlan Barua #endif
6533c3a9c75SAmlan Barua 
6543564f093SHong Zhang   PetscFunctionBegin;
655ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
656f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
657f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
6580298fd71SBarry Smith   ierr = VecGetOwnershipRange(y,&low,NULL);
6593c3a9c75SAmlan Barua 
6603564f093SHong Zhang   if (size==1) {
6618ca4c034SAmlan Barua     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
6628ca4c034SAmlan Barua     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
6639496c9aeSAmlan Barua     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
6646971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
6656971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
6666971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
6676971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
668b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
6693564f093SHong Zhang   } else {
6703c3a9c75SAmlan Barua     switch (ndim) {
6713c3a9c75SAmlan Barua     case 1:
67264657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
67364657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
67426fbe8dcSKarl Rupp 
6754f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);
6764f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,low,1,&list2);
67764657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
67864657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
67964657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
68064657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
68164657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
68264657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
68364657d84SAmlan Barua #else
684e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
68564657d84SAmlan Barua #endif
6863c3a9c75SAmlan Barua       break;
6873c3a9c75SAmlan Barua     case 2:
688bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
689bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
69026fbe8dcSKarl Rupp 
6914f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
6924f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
693bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
694bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
695bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
696bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
697bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
698bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
699bd59e6c6SAmlan Barua #else
7003c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
70126fbe8dcSKarl Rupp 
7023c3a9c75SAmlan Barua       N1   = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
7039496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
7049496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
7053c3a9c75SAmlan Barua 
7063564f093SHong Zhang       if (dim[1]%2==0) {
7073c3a9c75SAmlan Barua         NM = dim[1]+2;
7083564f093SHong Zhang       } else {
7093c3a9c75SAmlan Barua         NM = dim[1]+1;
7103564f093SHong Zhang       }
7113c3a9c75SAmlan Barua       for (i=0; i<local_n0; i++) {
7123c3a9c75SAmlan Barua         for (j=0; j<dim[1]; j++) {
7133c3a9c75SAmlan Barua           tempindx  = i*dim[1] + j;
7143c3a9c75SAmlan Barua           tempindx1 = i*NM + j;
71526fbe8dcSKarl Rupp 
7165b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
7173c3a9c75SAmlan Barua           indx2[tempindx]=low+tempindx1;
7183c3a9c75SAmlan Barua         }
7193c3a9c75SAmlan Barua       }
7203c3a9c75SAmlan Barua 
7213c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
7223c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
7233c3a9c75SAmlan Barua 
724f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
725f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
726f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
728b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
729b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
730b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
731b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
732bd59e6c6SAmlan Barua #endif
7339496c9aeSAmlan Barua       break;
7343c3a9c75SAmlan Barua 
7353c3a9c75SAmlan Barua     case 3:
736bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
737bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
73826fbe8dcSKarl Rupp 
7394f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
7404f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
741bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
742bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
743bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
744bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
745bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
746bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
747bd59e6c6SAmlan Barua #else
74851d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
74926fbe8dcSKarl Rupp 
75051d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
75151d1eed7SAmlan Barua 
7529496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
7539496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
75451d1eed7SAmlan Barua 
755db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
756db4deed7SKarl Rupp       else             NM = dim[2]+1;
75751d1eed7SAmlan Barua 
75851d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
75951d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
76051d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
76151d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
76251d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
76326fbe8dcSKarl Rupp 
76451d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
76551d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
76651d1eed7SAmlan Barua           }
76751d1eed7SAmlan Barua         }
76851d1eed7SAmlan Barua       }
76951d1eed7SAmlan Barua 
77051d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
77151d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
77251d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
77351d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
77451d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
77551d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
776b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
777b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
778b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
779b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
780bd59e6c6SAmlan Barua #endif
7819496c9aeSAmlan Barua       break;
7823c3a9c75SAmlan Barua 
7833c3a9c75SAmlan Barua     default:
784bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
785bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
78626fbe8dcSKarl Rupp 
7874f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
7884f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
789bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
790bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
791bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
792bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
793bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
794bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
795bd59e6c6SAmlan Barua #else
796e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
79726fbe8dcSKarl Rupp 
798e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
79926fbe8dcSKarl Rupp 
800e5d7f247SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
80126fbe8dcSKarl Rupp 
802e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
80326fbe8dcSKarl Rupp 
804e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
805e5d7f247SAmlan Barua 
806e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
807e5d7f247SAmlan Barua 
808e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
809e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
810e5d7f247SAmlan Barua 
811db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
812db4deed7SKarl Rupp       else                  NM = 1;
813e5d7f247SAmlan Barua 
8146971673cSAmlan Barua       j = low;
8153564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
8166971673cSAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
8176971673cSAmlan Barua         indx2[i] = j;
81826fbe8dcSKarl Rupp         if (k%dim[ndim-1]==0) j+=NM;
8196971673cSAmlan Barua         j++;
8206971673cSAmlan Barua       }
8216971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
8226971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
8236971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
8246971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8256971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8266971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
827b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
828b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
829b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
830b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
831bd59e6c6SAmlan Barua #endif
8329496c9aeSAmlan Barua       break;
8333c3a9c75SAmlan Barua     }
834e81bb599SAmlan Barua   }
8353564f093SHong Zhang   PetscFunctionReturn(0);
8363c3a9c75SAmlan Barua }
8373c3a9c75SAmlan Barua 
8383c3a9c75SAmlan Barua #undef __FUNCT__
83974a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc"
8403564f093SHong Zhang /*@
8413564f093SHong Zhang    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
8423564f093SHong Zhang 
8433564f093SHong Zhang    Collective on Mat
8443564f093SHong Zhang 
8453564f093SHong Zhang     Input Parameters:
8463564f093SHong Zhang +   A - FFTW matrix
8473564f093SHong Zhang -   x - FFTW vector
8483564f093SHong Zhang 
8493564f093SHong Zhang    Output Parameters:
8503564f093SHong Zhang .  y - PETSc vector
8513564f093SHong Zhang 
8523564f093SHong Zhang    Level: intermediate
8533564f093SHong Zhang 
8543564f093SHong Zhang    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
8553564f093SHong Zhang          VecScatterFFTWToPetsc removes those extra zeros.
8563564f093SHong Zhang 
8573564f093SHong Zhang .seealso: VecScatterPetscToFFTW()
8583564f093SHong Zhang @*/
85974a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
8603c3a9c75SAmlan Barua {
8613c3a9c75SAmlan Barua   PetscErrorCode ierr;
8625fd66863SKarl Rupp 
8633c3a9c75SAmlan Barua   PetscFunctionBegin;
86474a26884SAmlan Barua   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
8653c3a9c75SAmlan Barua   PetscFunctionReturn(0);
8663c3a9c75SAmlan Barua }
86754efbe56SHong Zhang 
8683c3a9c75SAmlan Barua #undef __FUNCT__
86974a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
87074a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
8715b3e41ffSAmlan Barua {
8725b3e41ffSAmlan Barua   PetscErrorCode ierr;
873ce94432eSBarry Smith   MPI_Comm       comm;
8745b3e41ffSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
8755b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
8769496c9aeSAmlan Barua   PetscInt       N     = fft->N;
877b3655f67SAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
8789496c9aeSAmlan Barua   PetscInt       low;
8799496c9aeSAmlan Barua   PetscInt       rank,size;
8805b3e41ffSAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
8819496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
8825b3e41ffSAmlan Barua   VecScatter     vecscat;
8835b3e41ffSAmlan Barua   IS             list1,list2;
8849496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
8859496c9aeSAmlan Barua   PetscInt  i,j,k,partial_dim;
8869496c9aeSAmlan Barua   PetscInt  *indx1, *indx2, tempindx, tempindx1;
8879496c9aeSAmlan Barua   PetscInt  N1, n1,NM;
8889496c9aeSAmlan Barua   ptrdiff_t temp;
8899496c9aeSAmlan Barua #endif
8909496c9aeSAmlan Barua 
8913564f093SHong Zhang   PetscFunctionBegin;
892ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
8935b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
8945b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
8950298fd71SBarry Smith   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
8965b3e41ffSAmlan Barua 
897e81bb599SAmlan Barua   if (size==1) {
898b3655f67SAmlan Barua     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
8996971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
9006971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9016971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9026971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
903b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
904e81bb599SAmlan Barua 
9053564f093SHong Zhang   } else {
906e81bb599SAmlan Barua     switch (ndim) {
907e81bb599SAmlan Barua     case 1:
90864657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
90964657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
91026fbe8dcSKarl Rupp 
9114f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);
9124f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,low,1,&list2);
91364657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
91464657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
91564657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
91664657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
91764657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
91864657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
91964657d84SAmlan Barua #else
9206a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
92164657d84SAmlan Barua #endif
9225b3e41ffSAmlan Barua       break;
9235b3e41ffSAmlan Barua     case 2:
924bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
925bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
92626fbe8dcSKarl Rupp 
9274f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
9284f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
929bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
930bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
931bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
932bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
933bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
934bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
935bd59e6c6SAmlan Barua #else
9365b3e41ffSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
93726fbe8dcSKarl Rupp 
9385b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
9395b3e41ffSAmlan Barua 
9409496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
9419496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
9425b3e41ffSAmlan Barua 
943db4deed7SKarl Rupp       if (dim[1]%2==0) NM = dim[1]+2;
944db4deed7SKarl Rupp       else             NM = dim[1]+1;
9455b3e41ffSAmlan Barua 
9465b3e41ffSAmlan Barua       for (i=0; i<local_n0; i++) {
9475b3e41ffSAmlan Barua         for (j=0; j<dim[1]; j++) {
9485b3e41ffSAmlan Barua           tempindx = i*dim[1] + j;
9495b3e41ffSAmlan Barua           tempindx1 = i*NM + j;
95026fbe8dcSKarl Rupp 
9515b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
9525b3e41ffSAmlan Barua           indx2[tempindx]=low+tempindx1;
9535b3e41ffSAmlan Barua         }
9545b3e41ffSAmlan Barua       }
9555b3e41ffSAmlan Barua 
9565b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
9575b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
9585b3e41ffSAmlan Barua 
9595b3e41ffSAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
9605b3e41ffSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9615b3e41ffSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9625b3e41ffSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
963b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
964b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
965b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
966b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
967bd59e6c6SAmlan Barua #endif
9689496c9aeSAmlan Barua       break;
9695b3e41ffSAmlan Barua     case 3:
970bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
971bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
97226fbe8dcSKarl Rupp 
9734f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
9744f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
975bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
976bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
977bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
978bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
979bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
980bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
981bd59e6c6SAmlan Barua #else
982bd59e6c6SAmlan Barua 
98351d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
98426fbe8dcSKarl Rupp 
98551d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
98651d1eed7SAmlan Barua 
9879496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
9889496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
98951d1eed7SAmlan Barua 
990db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
991db4deed7SKarl Rupp       else             NM = dim[2]+1;
99251d1eed7SAmlan Barua 
99351d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
99451d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
99551d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
99651d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
99751d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
99826fbe8dcSKarl Rupp 
99951d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
100051d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
100151d1eed7SAmlan Barua           }
100251d1eed7SAmlan Barua         }
100351d1eed7SAmlan Barua       }
100451d1eed7SAmlan Barua 
100551d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
100651d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
100751d1eed7SAmlan Barua 
100851d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
100951d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
101051d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
101151d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1012b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1013b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1014b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1015b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1016bd59e6c6SAmlan Barua #endif
10179496c9aeSAmlan Barua       break;
10185b3e41ffSAmlan Barua     default:
1019bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1020bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
102126fbe8dcSKarl Rupp 
10224f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
10234f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1024bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1025bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1026bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1027bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1028bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1029bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1030bd59e6c6SAmlan Barua #else
1031ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
103226fbe8dcSKarl Rupp 
1033ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
103426fbe8dcSKarl Rupp 
1035ba6e06d0SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
103626fbe8dcSKarl Rupp 
1037ba6e06d0SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
103826fbe8dcSKarl Rupp 
1039ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1040ba6e06d0SAmlan Barua 
1041ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1042ba6e06d0SAmlan Barua 
1043ba6e06d0SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1044ba6e06d0SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1045ba6e06d0SAmlan Barua 
1046db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
1047db4deed7SKarl Rupp       else                  NM = 1;
1048ba6e06d0SAmlan Barua 
1049ba6e06d0SAmlan Barua       j = low;
10503564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1051ba6e06d0SAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
1052ba6e06d0SAmlan Barua         indx2[i] = j;
10533564f093SHong Zhang         if (k%dim[ndim-1]==0) j+=NM;
1054ba6e06d0SAmlan Barua         j++;
1055ba6e06d0SAmlan Barua       }
1056ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1057ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1058ba6e06d0SAmlan Barua 
1059ba6e06d0SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1060ba6e06d0SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1061ba6e06d0SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1062ba6e06d0SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1063b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1064b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1065b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1066b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1067bd59e6c6SAmlan Barua #endif
10689496c9aeSAmlan Barua       break;
10695b3e41ffSAmlan Barua     }
1070e81bb599SAmlan Barua   }
10713564f093SHong Zhang   PetscFunctionReturn(0);
10725b3e41ffSAmlan Barua }
10735b3e41ffSAmlan Barua 
10745b3e41ffSAmlan Barua #undef __FUNCT__
10753c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
10763c3a9c75SAmlan Barua /*
10773564f093SHong Zhang     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
10783564f093SHong Zhang 
10793c3a9c75SAmlan Barua   Options Database Keys:
10803c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
10813c3a9c75SAmlan Barua 
10823c3a9c75SAmlan Barua    Level: intermediate
10833c3a9c75SAmlan Barua 
10843c3a9c75SAmlan Barua */
10858cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1086b2b77a04SHong Zhang {
1087b2b77a04SHong Zhang   PetscErrorCode ierr;
1088ce94432eSBarry Smith   MPI_Comm       comm;
1089b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1090b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1091b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
1092*5274ed1bSHong Zhang   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1093*5274ed1bSHong Zhang   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1094b2b77a04SHong Zhang   PetscBool      flg;
10954f48bc67SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr;
109611902ff2SHong Zhang   PetscMPIInt    size,rank;
10979496c9aeSAmlan Barua   ptrdiff_t      *pdim;
10989496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
10999496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11009496c9aeSAmlan Barua   ptrdiff_t temp;
11018ca4c034SAmlan Barua   PetscInt  N1,tot_dim;
11024f48bc67SAmlan Barua #else
11034f48bc67SAmlan Barua   PetscInt n1;
11049496c9aeSAmlan Barua #endif
11059496c9aeSAmlan Barua 
1106b2b77a04SHong Zhang   PetscFunctionBegin;
1107ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1108b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
110911902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1110c92418ccSAmlan Barua 
11111878d478SAmlan Barua   fftw_mpi_init();
111211902ff2SHong Zhang   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
111311902ff2SHong Zhang   pdim[0] = dim[0];
11148ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11158ca4c034SAmlan Barua   tot_dim = 2*dim[0];
11168ca4c034SAmlan Barua #endif
11173564f093SHong Zhang   for (ctr=1; ctr<ndim; ctr++) {
11186882372aSHong Zhang     partial_dim *= dim[ctr];
111911902ff2SHong Zhang     pdim[ctr]    = dim[ctr];
11208ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1121db4deed7SKarl Rupp     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1122db4deed7SKarl Rupp     else             tot_dim *= dim[ctr];
11238ca4c034SAmlan Barua #endif
11246882372aSHong Zhang   }
11253c3a9c75SAmlan Barua 
1126b2b77a04SHong Zhang   if (size == 1) {
1127e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1128b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1129b2b77a04SHong Zhang     n    = N;
1130e81bb599SAmlan Barua #else
1131e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1132e81bb599SAmlan Barua     n    = tot_dim;
1133e81bb599SAmlan Barua #endif
1134e81bb599SAmlan Barua 
1135b2b77a04SHong Zhang   } else {
11361abc6020SAmlan Barua     ptrdiff_t alloc_local,local_n0,local_0_start;
1137b2b77a04SHong Zhang     switch (ndim) {
1138b2b77a04SHong Zhang     case 1:
11393c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11403c3a9c75SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1141e5d7f247SAmlan Barua #else
11429496c9aeSAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
114326fbe8dcSKarl Rupp 
11446882372aSHong Zhang       n    = (PetscInt)local_n0;
11459496c9aeSAmlan Barua       n1   = (PetscInt)local_n1;
11469496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1147e5d7f247SAmlan Barua #endif
1148b2b77a04SHong Zhang       break;
1149b2b77a04SHong Zhang     case 2:
11505b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
1151b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1152b2b77a04SHong Zhang       /*
1153b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
11540ec8b6e3SBarry Smith        PetscSynchronizedFlush(comm,PETSC_STDOUT);
1155b2b77a04SHong Zhang        */
1156b2b77a04SHong Zhang       n    = (PetscInt)local_n0*dim[1];
1157b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
11585b3e41ffSAmlan Barua #else
11594f8276c3SHong Zhang       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
116026fbe8dcSKarl Rupp 
11615b3e41ffSAmlan Barua       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
1162c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
11635b3e41ffSAmlan Barua #endif
1164b2b77a04SHong Zhang       break;
1165b2b77a04SHong Zhang     case 3:
116651d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
116751d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
116826fbe8dcSKarl Rupp 
11696882372aSHong Zhang       n    = (PetscInt)local_n0*dim[1]*dim[2];
11706882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
117151d1eed7SAmlan Barua #else
11724f8276c3SHong Zhang       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
117326fbe8dcSKarl Rupp 
117451d1eed7SAmlan Barua       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1175c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
117651d1eed7SAmlan Barua #endif
1177b2b77a04SHong Zhang       break;
1178b2b77a04SHong Zhang     default:
1179b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
118011902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
118126fbe8dcSKarl Rupp 
11826882372aSHong Zhang       n    = (PetscInt)local_n0*partial_dim;
11836882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1184b3a17365SAmlan Barua #else
1185b3a17365SAmlan Barua       temp = pdim[ndim-1];
118626fbe8dcSKarl Rupp 
1187b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
118826fbe8dcSKarl Rupp 
11894f8276c3SHong Zhang       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
119026fbe8dcSKarl Rupp 
1191b3a17365SAmlan Barua       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1192b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
119326fbe8dcSKarl Rupp 
1194b3a17365SAmlan Barua       pdim[ndim-1] = temp;
119526fbe8dcSKarl Rupp 
1196c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1197b3a17365SAmlan Barua #endif
1198b2b77a04SHong Zhang       break;
1199b2b77a04SHong Zhang     }
1200b2b77a04SHong Zhang   }
1201b2b77a04SHong Zhang   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1202b00a9115SJed Brown   ierr      = PetscNewLog(A,&fftw);CHKERRQ(ierr);
1203b2b77a04SHong Zhang   fft->data = (void*)fftw;
1204b2b77a04SHong Zhang 
1205b2b77a04SHong Zhang   fft->n            = n;
12060c74a584SJed Brown   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1207e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
120826fbe8dcSKarl Rupp 
12095e806cb5SMatthew G. Knepley   ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr);
121026fbe8dcSKarl Rupp 
1211b1301b2eSHong Zhang   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1212c92418ccSAmlan Barua 
1213b2b77a04SHong Zhang   fftw->p_forward  = 0;
1214b2b77a04SHong Zhang   fftw->p_backward = 0;
1215b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1216b2b77a04SHong Zhang 
1217b2b77a04SHong Zhang   if (size == 1) {
1218b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1219b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1220b2b77a04SHong Zhang   } else {
1221b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1222b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1223b2b77a04SHong Zhang   }
1224b2b77a04SHong Zhang   fft->matdestroy = MatDestroy_FFTW;
1225b2b77a04SHong Zhang   A->assembled    = PETSC_TRUE;
12264a52bad8SHong Zhang   A->preallocated = PETSC_TRUE;
122726fbe8dcSKarl Rupp 
1228bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr);
1229bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
1230bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1231b2b77a04SHong Zhang 
1232b2b77a04SHong Zhang   /* get runtime options */
1233ce94432eSBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1234*5274ed1bSHong Zhang   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr);
1235*5274ed1bSHong Zhang   if (flg) {
1236*5274ed1bSHong Zhang     fftw->p_flag = iplans[p_flag];
1237*5274ed1bSHong Zhang   }
12384a52bad8SHong Zhang   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1239b2b77a04SHong Zhang   PetscFunctionReturn(0);
1240b2b77a04SHong Zhang }
12413c3a9c75SAmlan Barua 
12423c3a9c75SAmlan Barua 
12433c3a9c75SAmlan Barua 
12443c3a9c75SAmlan Barua 
1245