xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision ce94432eddcd14845bc7e8083b7f8ea723b9bf7d)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14e5d7f247SAmlan Barua   PetscInt    partial_dim;
15b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
16b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
19b2b77a04SHong Zhang } Mat_FFTW;
20b2b77a04SHong Zhang 
21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27b2b77a04SHong Zhang 
2897504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel
2997504071SAmlan Barua    Input parameter:
303564f093SHong Zhang      A - the matrix
3197504071SAmlan Barua      x - the vector on which FDFT will be performed
3297504071SAmlan Barua 
3397504071SAmlan Barua    Output parameter:
3497504071SAmlan Barua      y - vector that stores result of FDFT
3597504071SAmlan Barua */
36b2b77a04SHong Zhang #undef __FUNCT__
37b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
38b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
39b2b77a04SHong Zhang {
40b2b77a04SHong Zhang   PetscErrorCode ierr;
41b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
42b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
43b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
44b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
45b2b77a04SHong Zhang 
46b2b77a04SHong Zhang   PetscFunctionBegin;
47b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
48b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
49b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
50b2b77a04SHong Zhang     switch (ndim) {
51b2b77a04SHong Zhang     case 1:
5258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
53b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
5458a912c5SAmlan Barua #else
5558a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
5658a912c5SAmlan Barua #endif
57b2b77a04SHong Zhang       break;
58b2b77a04SHong Zhang     case 2:
5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
60b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6158a912c5SAmlan Barua #else
6258a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
6358a912c5SAmlan Barua #endif
64b2b77a04SHong Zhang       break;
65b2b77a04SHong Zhang     case 3:
6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
67b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6858a912c5SAmlan Barua #else
6958a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
7058a912c5SAmlan Barua #endif
71b2b77a04SHong Zhang       break;
72b2b77a04SHong Zhang     default:
7358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
74b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
7558a912c5SAmlan Barua #else
7658a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
7758a912c5SAmlan Barua #endif
78b2b77a04SHong Zhang       break;
79b2b77a04SHong Zhang     }
80b2b77a04SHong Zhang     fftw->finarray  = x_array;
81b2b77a04SHong Zhang     fftw->foutarray = y_array;
82b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
83b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
84b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
85b2b77a04SHong Zhang   } else { /* use existing plan */
86b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
87b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
88b2b77a04SHong Zhang     } else {
89b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
90b2b77a04SHong Zhang     }
91b2b77a04SHong Zhang   }
92b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
93b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
94b2b77a04SHong Zhang   PetscFunctionReturn(0);
95b2b77a04SHong Zhang }
96b2b77a04SHong Zhang 
9797504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT
9897504071SAmlan Barua    Input parameter:
993564f093SHong Zhang      A - the matrix
10097504071SAmlan Barua      x - the vector on which BDFT will be performed
10197504071SAmlan Barua 
10297504071SAmlan Barua    Output parameter:
10397504071SAmlan Barua      y - vector that stores result of BDFT
10497504071SAmlan Barua */
10597504071SAmlan Barua 
106b2b77a04SHong Zhang #undef __FUNCT__
107b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
108b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
109b2b77a04SHong Zhang {
110b2b77a04SHong Zhang   PetscErrorCode ierr;
111b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
112b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
113b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
114b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
115b2b77a04SHong Zhang 
116b2b77a04SHong Zhang   PetscFunctionBegin;
117b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
118b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
119b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
120b2b77a04SHong Zhang     switch (ndim) {
121b2b77a04SHong Zhang     case 1:
12258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
123b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12458a912c5SAmlan Barua #else
125e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
12658a912c5SAmlan Barua #endif
127b2b77a04SHong Zhang       break;
128b2b77a04SHong Zhang     case 2:
12958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
130b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
13158a912c5SAmlan Barua #else
132e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
13358a912c5SAmlan Barua #endif
134b2b77a04SHong Zhang       break;
135b2b77a04SHong Zhang     case 3:
13658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
137b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
13858a912c5SAmlan Barua #else
139e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
14058a912c5SAmlan Barua #endif
141b2b77a04SHong Zhang       break;
142b2b77a04SHong Zhang     default:
14358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
144b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
14558a912c5SAmlan Barua #else
146e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
14758a912c5SAmlan Barua #endif
148b2b77a04SHong Zhang       break;
149b2b77a04SHong Zhang     }
150b2b77a04SHong Zhang     fftw->binarray  = x_array;
151b2b77a04SHong Zhang     fftw->boutarray = y_array;
152b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
153b2b77a04SHong Zhang   } else { /* use existing plan */
154b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
155b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
156b2b77a04SHong Zhang     } else {
157b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
158b2b77a04SHong Zhang     }
159b2b77a04SHong Zhang   }
160b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
161b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
162b2b77a04SHong Zhang   PetscFunctionReturn(0);
163b2b77a04SHong Zhang }
164b2b77a04SHong Zhang 
16597504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel
16697504071SAmlan Barua    Input parameter:
1673564f093SHong Zhang      A - the matrix
16897504071SAmlan Barua      x - the vector on which FDFT will be performed
16997504071SAmlan Barua 
17097504071SAmlan Barua    Output parameter:
17197504071SAmlan Barua    y   - vector that stores result of FDFT
17297504071SAmlan Barua */
173b2b77a04SHong Zhang #undef __FUNCT__
174b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
175b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
176b2b77a04SHong Zhang {
177b2b77a04SHong Zhang   PetscErrorCode ierr;
178b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
179b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
180b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
181c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
182*ce94432eSBarry Smith   MPI_Comm       comm;
183b2b77a04SHong Zhang 
184b2b77a04SHong Zhang   PetscFunctionBegin;
185*ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
186b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
187b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
188b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
189b2b77a04SHong Zhang     switch (ndim) {
190b2b77a04SHong Zhang     case 1:
1913c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
192b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
193ae0a50aaSHong Zhang #else
1944f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
1953c3a9c75SAmlan Barua #endif
196b2b77a04SHong Zhang       break;
197b2b77a04SHong Zhang     case 2:
19897504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
199b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2003c3a9c75SAmlan Barua #else
2013c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2023c3a9c75SAmlan Barua #endif
203b2b77a04SHong Zhang       break;
204b2b77a04SHong Zhang     case 3:
2053c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
206b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2073c3a9c75SAmlan Barua #else
20851d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2093c3a9c75SAmlan Barua #endif
210b2b77a04SHong Zhang       break;
211b2b77a04SHong Zhang     default:
2123c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
213c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2143c3a9c75SAmlan Barua #else
2153c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2163c3a9c75SAmlan Barua #endif
217b2b77a04SHong Zhang       break;
218b2b77a04SHong Zhang     }
219b2b77a04SHong Zhang     fftw->finarray  = x_array;
220b2b77a04SHong Zhang     fftw->foutarray = y_array;
221b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
222b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
223b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
224b2b77a04SHong Zhang   } else { /* use existing plan */
225b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
226b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
227b2b77a04SHong Zhang     } else {
228b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
229b2b77a04SHong Zhang     }
230b2b77a04SHong Zhang   }
231b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
232b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
233b2b77a04SHong Zhang   PetscFunctionReturn(0);
234b2b77a04SHong Zhang }
235b2b77a04SHong Zhang 
23697504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT
23797504071SAmlan Barua    Input parameter:
2383564f093SHong Zhang      A - the matrix
23997504071SAmlan Barua      x - the vector on which BDFT will be performed
24097504071SAmlan Barua 
24197504071SAmlan Barua    Output parameter:
24297504071SAmlan Barua      y - vector that stores result of BDFT
24397504071SAmlan Barua */
244b2b77a04SHong Zhang #undef __FUNCT__
245b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
246b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
247b2b77a04SHong Zhang {
248b2b77a04SHong Zhang   PetscErrorCode ierr;
249b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
250b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
251b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
252c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
253*ce94432eSBarry Smith   MPI_Comm       comm;
254b2b77a04SHong Zhang 
255b2b77a04SHong Zhang   PetscFunctionBegin;
256*ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
257b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
258b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
259b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
260b2b77a04SHong Zhang     switch (ndim) {
261b2b77a04SHong Zhang     case 1:
2623c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
263b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
264ae0a50aaSHong Zhang #else
2654f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
2663c3a9c75SAmlan Barua #endif
267b2b77a04SHong Zhang       break;
268b2b77a04SHong Zhang     case 2:
26997504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
270b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2713c3a9c75SAmlan Barua #else
2723c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
2733c3a9c75SAmlan Barua #endif
274b2b77a04SHong Zhang       break;
275b2b77a04SHong Zhang     case 3:
2763c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
277b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2783c3a9c75SAmlan Barua #else
2793c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
2803c3a9c75SAmlan Barua #endif
281b2b77a04SHong Zhang       break;
282b2b77a04SHong Zhang     default:
2833c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
284c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2853c3a9c75SAmlan Barua #else
2863c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
2873c3a9c75SAmlan Barua #endif
288b2b77a04SHong Zhang       break;
289b2b77a04SHong Zhang     }
290b2b77a04SHong Zhang     fftw->binarray  = x_array;
291b2b77a04SHong Zhang     fftw->boutarray = y_array;
292b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
293b2b77a04SHong Zhang   } else { /* use existing plan */
294b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
295b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
296b2b77a04SHong Zhang     } else {
297b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
298b2b77a04SHong Zhang     }
299b2b77a04SHong Zhang   }
300b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
301b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
302b2b77a04SHong Zhang   PetscFunctionReturn(0);
303b2b77a04SHong Zhang }
304b2b77a04SHong Zhang 
305b2b77a04SHong Zhang #undef __FUNCT__
306b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
307b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
308b2b77a04SHong Zhang {
309b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
310b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
311b2b77a04SHong Zhang   PetscErrorCode ierr;
312b2b77a04SHong Zhang 
313b2b77a04SHong Zhang   PetscFunctionBegin;
314b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
315b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
316b1301b2eSHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
317bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
3186ccf0b3eSHong Zhang   fftw_mpi_cleanup();
319b2b77a04SHong Zhang   PetscFunctionReturn(0);
320b2b77a04SHong Zhang }
321b2b77a04SHong Zhang 
322c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
323b2b77a04SHong Zhang #undef __FUNCT__
324b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
325b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
326b2b77a04SHong Zhang {
327b2b77a04SHong Zhang   PetscErrorCode ierr;
328b2b77a04SHong Zhang   PetscScalar    *array;
329b2b77a04SHong Zhang 
330b2b77a04SHong Zhang   PetscFunctionBegin;
331b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
332b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
333b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
334b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
335b2b77a04SHong Zhang   PetscFunctionReturn(0);
336b2b77a04SHong Zhang }
337b2b77a04SHong Zhang 
3384f7415efSAmlan Barua #undef __FUNCT__
3394be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW"
3404be45526SHong Zhang /*@
341b2aa233eSHong Zhang    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
3424f7415efSAmlan Barua      parallel layout determined by FFTW
3434f7415efSAmlan Barua 
3444f7415efSAmlan Barua    Collective on Mat
3454f7415efSAmlan Barua 
3464f7415efSAmlan Barua    Input Parameter:
3473564f093SHong Zhang .   A - the matrix
3484f7415efSAmlan Barua 
3494f7415efSAmlan Barua    Output Parameter:
350cc55f3b1SHong Zhang +   x - (optional) input vector of forward FFTW
351cc55f3b1SHong Zhang -   y - (optional) output vector of forward FFTW
352cc55f3b1SHong Zhang -   z - (optional) output vector of backward FFTW
3534f7415efSAmlan Barua 
3544f7415efSAmlan Barua   Level: advanced
3553564f093SHong Zhang 
35697504071SAmlan Barua   Note: The parallel layout of output of forward FFTW is always same as the input
35797504071SAmlan Barua         of the backward FFTW. But parallel layout of the input vector of forward
35897504071SAmlan Barua         FFTW might not be same as the output of backward FFTW.
35997504071SAmlan Barua         Also note that we need to provide enough space while doing parallel real transform.
36097504071SAmlan Barua         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
36197504071SAmlan Barua         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
362e0ec536eSAmlan Barua         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
363e0ec536eSAmlan Barua         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
364e0ec536eSAmlan Barua         zeros if it is odd only one zero is needed.
365e0ec536eSAmlan Barua         Lastly one needs some scratch space at the end of data set in each process. alloc_local
366e0ec536eSAmlan Barua         figures out how much space is needed, i.e. it figures out the data+scratch space for
367e0ec536eSAmlan Barua         each processor and returns that.
3684f7415efSAmlan Barua 
3694f7415efSAmlan Barua .seealso: MatCreateFFTW()
3704be45526SHong Zhang @*/
3714be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
3724be45526SHong Zhang {
3734be45526SHong Zhang   PetscErrorCode ierr;
374b4c3921fSHong Zhang 
3754be45526SHong Zhang   PetscFunctionBegin;
3764be45526SHong Zhang   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
3774be45526SHong Zhang   PetscFunctionReturn(0);
3784be45526SHong Zhang }
3794be45526SHong Zhang 
3804f7415efSAmlan Barua EXTERN_C_BEGIN
3814be45526SHong Zhang #undef __FUNCT__
3824be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW"
3834be45526SHong Zhang PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
3844f7415efSAmlan Barua {
3854f7415efSAmlan Barua   PetscErrorCode ierr;
3864f7415efSAmlan Barua   PetscMPIInt    size,rank;
387*ce94432eSBarry Smith   MPI_Comm       comm;
3884f7415efSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
3894f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
3909496c9aeSAmlan Barua   PetscInt       N     = fft->N;
3914f7415efSAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
3924f7415efSAmlan Barua 
3934f7415efSAmlan Barua   PetscFunctionBegin;
3944f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
3954f7415efSAmlan Barua   PetscValidType(A,1);
396*ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
3974f7415efSAmlan Barua 
3984f8276c3SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
3994f8276c3SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
4004f7415efSAmlan Barua   if (size == 1) { /* sequential case */
4014f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4024f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
4034f7415efSAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
4044f7415efSAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
4054f7415efSAmlan Barua #else
4068ca4c034SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
4078ca4c034SAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
4088ca4c034SAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
4094f7415efSAmlan Barua #endif
41097504071SAmlan Barua   } else { /* parallel cases */
4114f7415efSAmlan Barua     ptrdiff_t    alloc_local,local_n0,local_0_start;
4129496c9aeSAmlan Barua     ptrdiff_t    local_n1;
4139496c9aeSAmlan Barua     fftw_complex *data_fout;
4149496c9aeSAmlan Barua     ptrdiff_t    local_1_start;
4159496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4169496c9aeSAmlan Barua     fftw_complex *data_fin,*data_bout;
4179496c9aeSAmlan Barua #else
4184f7415efSAmlan Barua     double    *data_finr,*data_boutr;
4196ccf0b3eSHong Zhang     PetscInt  n1,N1;
4209496c9aeSAmlan Barua     ptrdiff_t temp;
4219496c9aeSAmlan Barua #endif
4229496c9aeSAmlan Barua 
4234f7415efSAmlan Barua     switch (ndim) {
4244f7415efSAmlan Barua     case 1:
42557625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4264f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
42757625b84SAmlan Barua #else
42857625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
42957625b84SAmlan Barua       if (fin) {
43057625b84SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
431778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
43226fbe8dcSKarl Rupp 
43357625b84SAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
43457625b84SAmlan Barua       }
43557625b84SAmlan Barua       if (fout) {
43657625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
437778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
43826fbe8dcSKarl Rupp 
43957625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
44057625b84SAmlan Barua       }
44157625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
44257625b84SAmlan Barua       if (bout) {
44357625b84SAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
444778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
44526fbe8dcSKarl Rupp 
44657625b84SAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
44757625b84SAmlan Barua       }
44857625b84SAmlan Barua       break;
44957625b84SAmlan Barua #endif
4504f7415efSAmlan Barua     case 2:
45197504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
4524f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4534f7415efSAmlan Barua       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
4544f7415efSAmlan Barua       if (fin) {
4554f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
456778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
45726fbe8dcSKarl Rupp 
4584f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
4594f7415efSAmlan Barua       }
4604f7415efSAmlan Barua       if (bout) {
4614f7415efSAmlan Barua         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
462778a2246SBarry Smith         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
46326fbe8dcSKarl Rupp 
4644f7415efSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
4654f7415efSAmlan Barua       }
46657625b84SAmlan Barua       if (fout) {
46757625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
468778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
46926fbe8dcSKarl Rupp 
47057625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
47157625b84SAmlan Barua       }
4724f7415efSAmlan Barua #else
4734f7415efSAmlan Barua       /* Get local size */
4744f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
4754f7415efSAmlan Barua       if (fin) {
4764f7415efSAmlan Barua         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
477778a2246SBarry Smith         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
47826fbe8dcSKarl Rupp 
4794f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
4804f7415efSAmlan Barua       }
4814f7415efSAmlan Barua       if (fout) {
4824f7415efSAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
483778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
48426fbe8dcSKarl Rupp 
4854f7415efSAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
4864f7415efSAmlan Barua       }
4874f7415efSAmlan Barua       if (bout) {
4884f7415efSAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
489778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
49026fbe8dcSKarl Rupp 
4914f7415efSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
4924f7415efSAmlan Barua       }
4934f7415efSAmlan Barua #endif
4944f7415efSAmlan Barua       break;
4954f7415efSAmlan Barua     case 3:
4964f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4974f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4984f7415efSAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
4994f7415efSAmlan Barua       if (fin) {
5004f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
501778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
50226fbe8dcSKarl Rupp 
5034f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5044f7415efSAmlan Barua       }
5054f7415efSAmlan Barua       if (bout) {
5064f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
507778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
50826fbe8dcSKarl Rupp 
5094f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5104f7415efSAmlan Barua       }
5113564f093SHong Zhang 
51257625b84SAmlan Barua       if (fout) {
51357625b84SAmlan Barua         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
514778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
51526fbe8dcSKarl Rupp 
51657625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
51757625b84SAmlan Barua       }
5184f7415efSAmlan Barua #else
5190c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
5200c9b18e4SAmlan Barua       if (fin) {
5210c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
522778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
52326fbe8dcSKarl Rupp 
5240c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5250c9b18e4SAmlan Barua       }
5260c9b18e4SAmlan Barua       if (fout) {
5270c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
528778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
52926fbe8dcSKarl Rupp 
5300c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5310c9b18e4SAmlan Barua       }
5320c9b18e4SAmlan Barua       if (bout) {
5330c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
534778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
53526fbe8dcSKarl Rupp 
5360c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5370c9b18e4SAmlan Barua       }
5384f7415efSAmlan Barua #endif
5394f7415efSAmlan Barua       break;
5404f7415efSAmlan Barua     default:
5414f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5424f7415efSAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
54326fbe8dcSKarl Rupp 
5444f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
54526fbe8dcSKarl Rupp 
5464f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5474f7415efSAmlan Barua       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
54826fbe8dcSKarl Rupp 
5494f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
5504f7415efSAmlan Barua 
5514f7415efSAmlan Barua       if (fin) {
5524f7415efSAmlan Barua         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
553778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
55426fbe8dcSKarl Rupp 
5554f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5564f7415efSAmlan Barua       }
5574f7415efSAmlan Barua       if (bout) {
5584f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
559778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
56026fbe8dcSKarl Rupp 
5619496c9aeSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5624f7415efSAmlan Barua       }
5633564f093SHong Zhang 
56457625b84SAmlan Barua       if (fout) {
56557625b84SAmlan Barua         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
566778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
56726fbe8dcSKarl Rupp 
56857625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
56957625b84SAmlan Barua       }
5704f7415efSAmlan Barua #else
5710c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
5720c9b18e4SAmlan Barua       if (fin) {
5730c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
574778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
57526fbe8dcSKarl Rupp 
5760c9b18e4SAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5770c9b18e4SAmlan Barua       }
5780c9b18e4SAmlan Barua       if (fout) {
5790c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
580778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
58126fbe8dcSKarl Rupp 
5820c9b18e4SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
5830c9b18e4SAmlan Barua       }
5840c9b18e4SAmlan Barua       if (bout) {
5850c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
586778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
58726fbe8dcSKarl Rupp 
5880c9b18e4SAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5890c9b18e4SAmlan Barua       }
5904f7415efSAmlan Barua #endif
5914f7415efSAmlan Barua       break;
5924f7415efSAmlan Barua     }
5934f7415efSAmlan Barua   }
5944f7415efSAmlan Barua   PetscFunctionReturn(0);
5954f7415efSAmlan Barua }
5964f7415efSAmlan Barua EXTERN_C_END
5974f7415efSAmlan Barua 
598c92418ccSAmlan Barua #undef __FUNCT__
59974a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW"
6003564f093SHong Zhang /*@
6013564f093SHong Zhang    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
60254efbe56SHong Zhang 
6033564f093SHong Zhang    Collective on Mat
6043564f093SHong Zhang 
6053564f093SHong Zhang    Input Parameters:
6063564f093SHong Zhang +  A - FFTW matrix
6073564f093SHong Zhang -  x - the PETSc vector
6083564f093SHong Zhang 
6093564f093SHong Zhang    Output Parameters:
6103564f093SHong Zhang .  y - the FFTW vector
6113564f093SHong Zhang 
612b2b77a04SHong Zhang   Options Database Keys:
6133564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags
614b2b77a04SHong Zhang 
615b2b77a04SHong Zhang    Level: intermediate
6163564f093SHong Zhang 
61797504071SAmlan Barua    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
61897504071SAmlan Barua          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
61997504071SAmlan Barua          zeros. This routine does that job by scattering operation.
62097504071SAmlan Barua 
6213564f093SHong Zhang .seealso: VecScatterFFTWToPetsc()
6223564f093SHong Zhang @*/
6233564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
6243564f093SHong Zhang {
6253564f093SHong Zhang   PetscErrorCode ierr;
626b2b77a04SHong Zhang 
6273564f093SHong Zhang   PetscFunctionBegin;
6283564f093SHong Zhang   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
6293564f093SHong Zhang   PetscFunctionReturn(0);
6303564f093SHong Zhang }
6313c3a9c75SAmlan Barua 
6323c3a9c75SAmlan Barua EXTERN_C_BEGIN
6333c3a9c75SAmlan Barua #undef __FUNCT__
6341986ecc6SHong Zhang #define __FUNCT__ "VecScatterPetscToFFTW_FFTW"
63574a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
6363c3a9c75SAmlan Barua {
6373c3a9c75SAmlan Barua   PetscErrorCode ierr;
638*ce94432eSBarry Smith   MPI_Comm       comm;
6393c3a9c75SAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
6403c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
6419496c9aeSAmlan Barua   PetscInt       N     =fft->N;
642b5d11533SAmlan Barua   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
6439496c9aeSAmlan Barua   PetscInt       low;
6448ca4c034SAmlan Barua   PetscInt       rank,size,vsize,vsize1;
6453c3a9c75SAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
6469496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
6473c3a9c75SAmlan Barua   VecScatter     vecscat;
6483c3a9c75SAmlan Barua   IS             list1,list2;
6499496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6509496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
6519496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
6529496c9aeSAmlan Barua   PetscInt       N1, n1,NM;
6539496c9aeSAmlan Barua   ptrdiff_t      temp;
6549496c9aeSAmlan Barua #endif
6553c3a9c75SAmlan Barua 
6563564f093SHong Zhang   PetscFunctionBegin;
657*ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
658f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
659f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
6600298fd71SBarry Smith   ierr = VecGetOwnershipRange(y,&low,NULL);
6613c3a9c75SAmlan Barua 
6623564f093SHong Zhang   if (size==1) {
6638ca4c034SAmlan Barua     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
6648ca4c034SAmlan Barua     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
6659496c9aeSAmlan Barua     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
6666971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
6676971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
6686971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
6696971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
670b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
6713564f093SHong Zhang   } else {
6723c3a9c75SAmlan Barua     switch (ndim) {
6733c3a9c75SAmlan Barua     case 1:
67464657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
67564657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
67626fbe8dcSKarl Rupp 
6774f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);
6784f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,low,1,&list2);
67964657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
68064657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
68164657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
68264657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
68364657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
68464657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
68564657d84SAmlan Barua #else
686e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
68764657d84SAmlan Barua #endif
6883c3a9c75SAmlan Barua       break;
6893c3a9c75SAmlan Barua     case 2:
690bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
691bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
69226fbe8dcSKarl Rupp 
6934f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
6944f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
695bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
696bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
697bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
698bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
699bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
700bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
701bd59e6c6SAmlan Barua #else
7023c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
70326fbe8dcSKarl Rupp 
7043c3a9c75SAmlan Barua       N1   = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
7059496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
7069496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
7073c3a9c75SAmlan Barua 
7083564f093SHong Zhang       if (dim[1]%2==0) {
7093c3a9c75SAmlan Barua         NM = dim[1]+2;
7103564f093SHong Zhang       } else {
7113c3a9c75SAmlan Barua         NM = dim[1]+1;
7123564f093SHong Zhang       }
7133c3a9c75SAmlan Barua       for (i=0; i<local_n0; i++) {
7143c3a9c75SAmlan Barua         for (j=0; j<dim[1]; j++) {
7153c3a9c75SAmlan Barua           tempindx  = i*dim[1] + j;
7163c3a9c75SAmlan Barua           tempindx1 = i*NM + j;
71726fbe8dcSKarl Rupp 
7185b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
7193c3a9c75SAmlan Barua           indx2[tempindx]=low+tempindx1;
7203c3a9c75SAmlan Barua         }
7213c3a9c75SAmlan Barua       }
7223c3a9c75SAmlan Barua 
7233c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
7243c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
7253c3a9c75SAmlan Barua 
726f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
727f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
728f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
729f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
730b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
731b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
732b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
733b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
734bd59e6c6SAmlan Barua #endif
7359496c9aeSAmlan Barua       break;
7363c3a9c75SAmlan Barua 
7373c3a9c75SAmlan Barua     case 3:
738bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
739bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
74026fbe8dcSKarl Rupp 
7414f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
7424f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
743bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
744bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
745bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
746bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
747bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
748bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
749bd59e6c6SAmlan Barua #else
75051d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
75126fbe8dcSKarl Rupp 
75251d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
75351d1eed7SAmlan Barua 
7549496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
7559496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
75651d1eed7SAmlan Barua 
757db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
758db4deed7SKarl Rupp       else             NM = dim[2]+1;
75951d1eed7SAmlan Barua 
76051d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
76151d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
76251d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
76351d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
76451d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
76526fbe8dcSKarl Rupp 
76651d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
76751d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
76851d1eed7SAmlan Barua           }
76951d1eed7SAmlan Barua         }
77051d1eed7SAmlan Barua       }
77151d1eed7SAmlan Barua 
77251d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
77351d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
77451d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
77551d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
77651d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
77751d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
778b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
779b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
780b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
781b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
782bd59e6c6SAmlan Barua #endif
7839496c9aeSAmlan Barua       break;
7843c3a9c75SAmlan Barua 
7853c3a9c75SAmlan Barua     default:
786bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
787bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
78826fbe8dcSKarl Rupp 
7894f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
7904f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
791bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
792bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
793bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
794bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
795bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
796bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
797bd59e6c6SAmlan Barua #else
798e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
79926fbe8dcSKarl Rupp 
800e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
80126fbe8dcSKarl Rupp 
802e5d7f247SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
80326fbe8dcSKarl Rupp 
804e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
80526fbe8dcSKarl Rupp 
806e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
807e5d7f247SAmlan Barua 
808e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
809e5d7f247SAmlan Barua 
810e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
811e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
812e5d7f247SAmlan Barua 
813db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
814db4deed7SKarl Rupp       else                  NM = 1;
815e5d7f247SAmlan Barua 
8166971673cSAmlan Barua       j = low;
8173564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
8186971673cSAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
8196971673cSAmlan Barua         indx2[i] = j;
82026fbe8dcSKarl Rupp         if (k%dim[ndim-1]==0) j+=NM;
8216971673cSAmlan Barua         j++;
8226971673cSAmlan Barua       }
8236971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
8246971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
8256971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
8266971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8276971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8286971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
829b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
830b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
831b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
832b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
833bd59e6c6SAmlan Barua #endif
8349496c9aeSAmlan Barua       break;
8353c3a9c75SAmlan Barua     }
836e81bb599SAmlan Barua   }
8373564f093SHong Zhang   PetscFunctionReturn(0);
8383c3a9c75SAmlan Barua }
8393c3a9c75SAmlan Barua EXTERN_C_END
8403c3a9c75SAmlan Barua 
8413c3a9c75SAmlan Barua #undef __FUNCT__
84274a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc"
8433564f093SHong Zhang /*@
8443564f093SHong Zhang    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
8453564f093SHong Zhang 
8463564f093SHong Zhang    Collective on Mat
8473564f093SHong Zhang 
8483564f093SHong Zhang     Input Parameters:
8493564f093SHong Zhang +   A - FFTW matrix
8503564f093SHong Zhang -   x - FFTW vector
8513564f093SHong Zhang 
8523564f093SHong Zhang    Output Parameters:
8533564f093SHong Zhang .  y - PETSc vector
8543564f093SHong Zhang 
8553564f093SHong Zhang    Level: intermediate
8563564f093SHong Zhang 
8573564f093SHong Zhang    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
8583564f093SHong Zhang          VecScatterFFTWToPetsc removes those extra zeros.
8593564f093SHong Zhang 
8603564f093SHong Zhang .seealso: VecScatterPetscToFFTW()
8613564f093SHong Zhang @*/
86274a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
8633c3a9c75SAmlan Barua {
8643c3a9c75SAmlan Barua   PetscErrorCode ierr;
8655fd66863SKarl Rupp 
8663c3a9c75SAmlan Barua   PetscFunctionBegin;
86774a26884SAmlan Barua   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
8683c3a9c75SAmlan Barua   PetscFunctionReturn(0);
8693c3a9c75SAmlan Barua }
87054efbe56SHong Zhang 
8713c3a9c75SAmlan Barua EXTERN_C_BEGIN
8723c3a9c75SAmlan Barua #undef __FUNCT__
87374a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
87474a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
8755b3e41ffSAmlan Barua {
8765b3e41ffSAmlan Barua   PetscErrorCode ierr;
877*ce94432eSBarry Smith   MPI_Comm       comm;
8785b3e41ffSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
8795b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
8809496c9aeSAmlan Barua   PetscInt       N     = fft->N;
881b3655f67SAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
8829496c9aeSAmlan Barua   PetscInt       low;
8839496c9aeSAmlan Barua   PetscInt       rank,size;
8845b3e41ffSAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
8859496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
8865b3e41ffSAmlan Barua   VecScatter     vecscat;
8875b3e41ffSAmlan Barua   IS             list1,list2;
8889496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
8899496c9aeSAmlan Barua   PetscInt  i,j,k,partial_dim;
8909496c9aeSAmlan Barua   PetscInt  *indx1, *indx2, tempindx, tempindx1;
8919496c9aeSAmlan Barua   PetscInt  N1, n1,NM;
8929496c9aeSAmlan Barua   ptrdiff_t temp;
8939496c9aeSAmlan Barua #endif
8949496c9aeSAmlan Barua 
8953564f093SHong Zhang   PetscFunctionBegin;
896*ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
8975b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
8985b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
8990298fd71SBarry Smith   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
9005b3e41ffSAmlan Barua 
901e81bb599SAmlan Barua   if (size==1) {
902b3655f67SAmlan Barua     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
9036971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
9046971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9056971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9066971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
907b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
908e81bb599SAmlan Barua 
9093564f093SHong Zhang   } else {
910e81bb599SAmlan Barua     switch (ndim) {
911e81bb599SAmlan Barua     case 1:
91264657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
91364657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
91426fbe8dcSKarl Rupp 
9154f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);
9164f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,low,1,&list2);
91764657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
91864657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
91964657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
92064657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
92164657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
92264657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
92364657d84SAmlan Barua #else
9246a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
92564657d84SAmlan Barua #endif
9265b3e41ffSAmlan Barua       break;
9275b3e41ffSAmlan Barua     case 2:
928bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
929bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
93026fbe8dcSKarl Rupp 
9314f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
9324f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
933bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
934bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
935bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
936bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
937bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
938bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
939bd59e6c6SAmlan Barua #else
9405b3e41ffSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
94126fbe8dcSKarl Rupp 
9425b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
9435b3e41ffSAmlan Barua 
9449496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
9459496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
9465b3e41ffSAmlan Barua 
947db4deed7SKarl Rupp       if (dim[1]%2==0) NM = dim[1]+2;
948db4deed7SKarl Rupp       else             NM = dim[1]+1;
9495b3e41ffSAmlan Barua 
9505b3e41ffSAmlan Barua       for (i=0; i<local_n0; i++) {
9515b3e41ffSAmlan Barua         for (j=0; j<dim[1]; j++) {
9525b3e41ffSAmlan Barua           tempindx = i*dim[1] + j;
9535b3e41ffSAmlan Barua           tempindx1 = i*NM + j;
95426fbe8dcSKarl Rupp 
9555b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
9565b3e41ffSAmlan Barua           indx2[tempindx]=low+tempindx1;
9575b3e41ffSAmlan Barua         }
9585b3e41ffSAmlan Barua       }
9595b3e41ffSAmlan Barua 
9605b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
9615b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
9625b3e41ffSAmlan Barua 
9635b3e41ffSAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
9645b3e41ffSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9655b3e41ffSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9665b3e41ffSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
967b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
968b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
969b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
970b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
971bd59e6c6SAmlan Barua #endif
9729496c9aeSAmlan Barua       break;
9735b3e41ffSAmlan Barua     case 3:
974bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
975bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
97626fbe8dcSKarl Rupp 
9774f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
9784f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
979bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
980bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
981bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
982bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
983bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
984bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
985bd59e6c6SAmlan Barua #else
986bd59e6c6SAmlan Barua 
98751d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
98826fbe8dcSKarl Rupp 
98951d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
99051d1eed7SAmlan Barua 
9919496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
9929496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
99351d1eed7SAmlan Barua 
994db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
995db4deed7SKarl Rupp       else             NM = dim[2]+1;
99651d1eed7SAmlan Barua 
99751d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
99851d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
99951d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
100051d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
100151d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
100226fbe8dcSKarl Rupp 
100351d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
100451d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
100551d1eed7SAmlan Barua           }
100651d1eed7SAmlan Barua         }
100751d1eed7SAmlan Barua       }
100851d1eed7SAmlan Barua 
100951d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
101051d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
101151d1eed7SAmlan Barua 
101251d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
101351d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
101451d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
101551d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1016b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1017b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1018b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1019b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1020bd59e6c6SAmlan Barua #endif
10219496c9aeSAmlan Barua       break;
10225b3e41ffSAmlan Barua     default:
1023bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1024bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
102526fbe8dcSKarl Rupp 
10264f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
10274f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1028bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1029bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1031bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1032bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1033bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1034bd59e6c6SAmlan Barua #else
1035ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
103626fbe8dcSKarl Rupp 
1037ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
103826fbe8dcSKarl Rupp 
1039ba6e06d0SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
104026fbe8dcSKarl Rupp 
1041ba6e06d0SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
104226fbe8dcSKarl Rupp 
1043ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1044ba6e06d0SAmlan Barua 
1045ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1046ba6e06d0SAmlan Barua 
1047ba6e06d0SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1048ba6e06d0SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1049ba6e06d0SAmlan Barua 
1050db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
1051db4deed7SKarl Rupp       else                  NM = 1;
1052ba6e06d0SAmlan Barua 
1053ba6e06d0SAmlan Barua       j = low;
10543564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1055ba6e06d0SAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
1056ba6e06d0SAmlan Barua         indx2[i] = j;
10573564f093SHong Zhang         if (k%dim[ndim-1]==0) j+=NM;
1058ba6e06d0SAmlan Barua         j++;
1059ba6e06d0SAmlan Barua       }
1060ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1061ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1062ba6e06d0SAmlan Barua 
1063ba6e06d0SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1064ba6e06d0SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1065ba6e06d0SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1066ba6e06d0SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1067b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1068b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1069b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1070b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1071bd59e6c6SAmlan Barua #endif
10729496c9aeSAmlan Barua       break;
10735b3e41ffSAmlan Barua     }
1074e81bb599SAmlan Barua   }
10753564f093SHong Zhang   PetscFunctionReturn(0);
10765b3e41ffSAmlan Barua }
10775b3e41ffSAmlan Barua EXTERN_C_END
10785b3e41ffSAmlan Barua 
10795b3e41ffSAmlan Barua EXTERN_C_BEGIN
10805b3e41ffSAmlan Barua #undef __FUNCT__
10813c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
10823c3a9c75SAmlan Barua /*
10833564f093SHong Zhang     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
10843564f093SHong Zhang 
10853c3a9c75SAmlan Barua   Options Database Keys:
10863c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
10873c3a9c75SAmlan Barua 
10883c3a9c75SAmlan Barua    Level: intermediate
10893c3a9c75SAmlan Barua 
10903c3a9c75SAmlan Barua */
1091b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A)
1092b2b77a04SHong Zhang {
1093b2b77a04SHong Zhang   PetscErrorCode ierr;
1094*ce94432eSBarry Smith   MPI_Comm       comm;
1095b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1096b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1097b2b77a04SHong Zhang   PetscInt       n         =fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1098b2b77a04SHong Zhang   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1099b2b77a04SHong Zhang   PetscBool      flg;
11004f48bc67SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr;
110111902ff2SHong Zhang   PetscMPIInt    size,rank;
11029496c9aeSAmlan Barua   ptrdiff_t      *pdim;
11039496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
11049496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11059496c9aeSAmlan Barua   ptrdiff_t temp;
11068ca4c034SAmlan Barua   PetscInt  N1,tot_dim;
11074f48bc67SAmlan Barua #else
11084f48bc67SAmlan Barua   PetscInt n1;
11099496c9aeSAmlan Barua #endif
11109496c9aeSAmlan Barua 
1111b2b77a04SHong Zhang   PetscFunctionBegin;
1112*ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1113b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
111411902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1115c92418ccSAmlan Barua 
11161878d478SAmlan Barua   fftw_mpi_init();
111711902ff2SHong Zhang   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
111811902ff2SHong Zhang   pdim[0] = dim[0];
11198ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11208ca4c034SAmlan Barua   tot_dim = 2*dim[0];
11218ca4c034SAmlan Barua #endif
11223564f093SHong Zhang   for (ctr=1; ctr<ndim; ctr++) {
11236882372aSHong Zhang     partial_dim *= dim[ctr];
112411902ff2SHong Zhang     pdim[ctr]    = dim[ctr];
11258ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1126db4deed7SKarl Rupp     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1127db4deed7SKarl Rupp     else             tot_dim *= dim[ctr];
11288ca4c034SAmlan Barua #endif
11296882372aSHong Zhang   }
11303c3a9c75SAmlan Barua 
1131b2b77a04SHong Zhang   if (size == 1) {
1132e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1133b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1134b2b77a04SHong Zhang     n    = N;
1135e81bb599SAmlan Barua #else
1136e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1137e81bb599SAmlan Barua     n    = tot_dim;
1138e81bb599SAmlan Barua #endif
1139e81bb599SAmlan Barua 
1140b2b77a04SHong Zhang   } else {
11411abc6020SAmlan Barua     ptrdiff_t alloc_local,local_n0,local_0_start;
1142b2b77a04SHong Zhang     switch (ndim) {
1143b2b77a04SHong Zhang     case 1:
11443c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11453c3a9c75SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1146e5d7f247SAmlan Barua #else
11479496c9aeSAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
114826fbe8dcSKarl Rupp 
11496882372aSHong Zhang       n    = (PetscInt)local_n0;
11509496c9aeSAmlan Barua       n1   = (PetscInt)local_n1;
11519496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1152e5d7f247SAmlan Barua #endif
1153b2b77a04SHong Zhang       break;
1154b2b77a04SHong Zhang     case 2:
11555b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
1156b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1157b2b77a04SHong Zhang       /*
1158b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1159b2b77a04SHong Zhang        PetscSynchronizedFlush(comm);
1160b2b77a04SHong Zhang        */
1161b2b77a04SHong Zhang       n    = (PetscInt)local_n0*dim[1];
1162b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
11635b3e41ffSAmlan Barua #else
11644f8276c3SHong Zhang       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
116526fbe8dcSKarl Rupp 
11665b3e41ffSAmlan Barua       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
1167c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
11685b3e41ffSAmlan Barua #endif
1169b2b77a04SHong Zhang       break;
1170b2b77a04SHong Zhang     case 3:
117151d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
117251d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
117326fbe8dcSKarl Rupp 
11746882372aSHong Zhang       n    = (PetscInt)local_n0*dim[1]*dim[2];
11756882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
117651d1eed7SAmlan Barua #else
11774f8276c3SHong Zhang       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
117826fbe8dcSKarl Rupp 
117951d1eed7SAmlan Barua       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1180c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
118151d1eed7SAmlan Barua #endif
1182b2b77a04SHong Zhang       break;
1183b2b77a04SHong Zhang     default:
1184b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
118511902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
118626fbe8dcSKarl Rupp 
11876882372aSHong Zhang       n    = (PetscInt)local_n0*partial_dim;
11886882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1189b3a17365SAmlan Barua #else
1190b3a17365SAmlan Barua       temp = pdim[ndim-1];
119126fbe8dcSKarl Rupp 
1192b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
119326fbe8dcSKarl Rupp 
11944f8276c3SHong Zhang       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
119526fbe8dcSKarl Rupp 
1196b3a17365SAmlan Barua       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1197b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
119826fbe8dcSKarl Rupp 
1199b3a17365SAmlan Barua       pdim[ndim-1] = temp;
120026fbe8dcSKarl Rupp 
1201c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1202b3a17365SAmlan Barua #endif
1203b2b77a04SHong Zhang       break;
1204b2b77a04SHong Zhang     }
1205b2b77a04SHong Zhang   }
1206b2b77a04SHong Zhang   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1207b2b77a04SHong Zhang   ierr      = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1208b2b77a04SHong Zhang   fft->data = (void*)fftw;
1209b2b77a04SHong Zhang 
1210b2b77a04SHong Zhang   fft->n            = n;
12110c74a584SJed Brown   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1212e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
121326fbe8dcSKarl Rupp 
1214c92418ccSAmlan Barua   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t*)&(fftw->dim_fftw));CHKERRQ(ierr);
121526fbe8dcSKarl Rupp 
1216b1301b2eSHong Zhang   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1217c92418ccSAmlan Barua 
1218b2b77a04SHong Zhang   fftw->p_forward  = 0;
1219b2b77a04SHong Zhang   fftw->p_backward = 0;
1220b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1221b2b77a04SHong Zhang 
1222b2b77a04SHong Zhang   if (size == 1) {
1223b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1224b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1225b2b77a04SHong Zhang   } else {
1226b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1227b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1228b2b77a04SHong Zhang   }
1229b2b77a04SHong Zhang   fft->matdestroy = MatDestroy_FFTW;
1230b2b77a04SHong Zhang   A->assembled    = PETSC_TRUE;
12314a52bad8SHong Zhang   A->preallocated = PETSC_TRUE;
123226fbe8dcSKarl Rupp 
12334a52bad8SHong Zhang   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);CHKERRQ(ierr);
12344a52bad8SHong Zhang   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
12354a52bad8SHong Zhang   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1236b2b77a04SHong Zhang 
1237b2b77a04SHong Zhang   /* get runtime options */
1238*ce94432eSBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1239b2b77a04SHong Zhang   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
124026fbe8dcSKarl Rupp   if (flg) fftw->p_flag = (unsigned)p_flag;
12414a52bad8SHong Zhang   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1242b2b77a04SHong Zhang   PetscFunctionReturn(0);
1243b2b77a04SHong Zhang }
1244b2b77a04SHong Zhang EXTERN_C_END
12453c3a9c75SAmlan Barua 
12463c3a9c75SAmlan Barua 
12473c3a9c75SAmlan Barua 
12483c3a9c75SAmlan Barua 
1249