xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 7a21806cec79a1efbed21fbe5568b2c145b0efd9)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14e5d7f247SAmlan Barua   PetscInt    partial_dim;
15b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
16b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
19b2b77a04SHong Zhang } Mat_FFTW;
20b2b77a04SHong Zhang 
21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27b2b77a04SHong Zhang 
2897504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel
2997504071SAmlan Barua    Input parameter:
303564f093SHong Zhang      A - the matrix
3197504071SAmlan Barua      x - the vector on which FDFT will be performed
3297504071SAmlan Barua 
3397504071SAmlan Barua    Output parameter:
3497504071SAmlan Barua      y - vector that stores result of FDFT
3597504071SAmlan Barua */
36b2b77a04SHong Zhang #undef __FUNCT__
37b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
38b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
39b2b77a04SHong Zhang {
40b2b77a04SHong Zhang   PetscErrorCode ierr;
41b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
42b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
43b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
44b2b77a04SHong Zhang   PetscInt       ndim = fft->ndim,*dim = fft->dim;
45*7a21806cSHong Zhang   int            *dim_32,i;
46b2b77a04SHong Zhang 
47b2b77a04SHong Zhang   PetscFunctionBegin;
48*7a21806cSHong Zhang   ierr = PetscMalloc(ndim*sizeof(int),&dim_32);
49*7a21806cSHong Zhang   for (i=0; i<ndim; i++) dim_32[i] = (int)dim[i];
50*7a21806cSHong Zhang 
51b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
52b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
53b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
54b2b77a04SHong Zhang     switch (ndim) {
55b2b77a04SHong Zhang     case 1:
5658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
57b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
5858a912c5SAmlan Barua #else
5958a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
6058a912c5SAmlan Barua #endif
61b2b77a04SHong Zhang       break;
62b2b77a04SHong Zhang     case 2:
6358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
64b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6558a912c5SAmlan Barua #else
6658a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
6758a912c5SAmlan Barua #endif
68b2b77a04SHong Zhang       break;
69b2b77a04SHong Zhang     case 3:
7058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
71b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
72*7a21806cSHong Zhang       //fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,FFTW_MEASURE);
7358a912c5SAmlan Barua #else
7458a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
7558a912c5SAmlan Barua #endif
76b2b77a04SHong Zhang       break;
77b2b77a04SHong Zhang     default:
7858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
79*7a21806cSHong Zhang       fftw->p_forward = fftw_plan_dft((int)ndim,(int*)dim_32,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
805274ed1bSHong Zhang       /* bug: dim[3] changes from 10 to 0 '--with-64-bit-indices=1' */
8158a912c5SAmlan Barua #else
8258a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
8358a912c5SAmlan Barua #endif
84b2b77a04SHong Zhang       break;
85b2b77a04SHong Zhang     }
86b2b77a04SHong Zhang     fftw->finarray  = x_array;
87b2b77a04SHong Zhang     fftw->foutarray = y_array;
88b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
89b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
90b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
91b2b77a04SHong Zhang   } else { /* use existing plan */
92b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
93b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
94b2b77a04SHong Zhang     } else {
95b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
96b2b77a04SHong Zhang     }
97b2b77a04SHong Zhang   }
98b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
99b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
100*7a21806cSHong Zhang   ierr = PetscFree(dim_32);CHKERRQ(ierr);
101b2b77a04SHong Zhang   PetscFunctionReturn(0);
102b2b77a04SHong Zhang }
103b2b77a04SHong Zhang 
10497504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT
10597504071SAmlan Barua    Input parameter:
1063564f093SHong Zhang      A - the matrix
10797504071SAmlan Barua      x - the vector on which BDFT will be performed
10897504071SAmlan Barua 
10997504071SAmlan Barua    Output parameter:
11097504071SAmlan Barua      y - vector that stores result of BDFT
11197504071SAmlan Barua */
11297504071SAmlan Barua 
113b2b77a04SHong Zhang #undef __FUNCT__
114b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
115b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
116b2b77a04SHong Zhang {
117b2b77a04SHong Zhang   PetscErrorCode ierr;
118b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
119b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
120b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
121b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
122*7a21806cSHong Zhang   int            *dim_32,i; /* for 64-bit */
123b2b77a04SHong Zhang 
124b2b77a04SHong Zhang   PetscFunctionBegin;
125*7a21806cSHong Zhang   ierr = PetscMalloc(ndim*sizeof(int),&dim_32);
126*7a21806cSHong Zhang   for (i=0; i<ndim; i++) dim_32[i] = (int)dim[i];
127*7a21806cSHong Zhang 
128b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
129b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
130b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
131b2b77a04SHong Zhang     switch (ndim) {
132b2b77a04SHong Zhang     case 1:
13358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
134b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
13558a912c5SAmlan Barua #else
136e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
13758a912c5SAmlan Barua #endif
138b2b77a04SHong Zhang       break;
139b2b77a04SHong Zhang     case 2:
14058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
141b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
14258a912c5SAmlan Barua #else
143e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
14458a912c5SAmlan Barua #endif
145b2b77a04SHong Zhang       break;
146b2b77a04SHong Zhang     case 3:
14758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
148b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
149*7a21806cSHong Zhang       //fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,FFTW_MEASURE);
15058a912c5SAmlan Barua #else
151e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
15258a912c5SAmlan Barua #endif
153b2b77a04SHong Zhang       break;
154b2b77a04SHong Zhang     default:
15558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
156*7a21806cSHong Zhang       fftw->p_backward = fftw_plan_dft((int)ndim,dim_32,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
15758a912c5SAmlan Barua #else
158e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
15958a912c5SAmlan Barua #endif
160b2b77a04SHong Zhang       break;
161b2b77a04SHong Zhang     }
162b2b77a04SHong Zhang     fftw->binarray  = x_array;
163b2b77a04SHong Zhang     fftw->boutarray = y_array;
164b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
165b2b77a04SHong Zhang   } else { /* use existing plan */
166b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
167b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
168b2b77a04SHong Zhang     } else {
169b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
170b2b77a04SHong Zhang     }
171b2b77a04SHong Zhang   }
172b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
173b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
174*7a21806cSHong Zhang   ierr = PetscFree(dim_32);CHKERRQ(ierr);
175b2b77a04SHong Zhang   PetscFunctionReturn(0);
176b2b77a04SHong Zhang }
177b2b77a04SHong Zhang 
17897504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel
17997504071SAmlan Barua    Input parameter:
1803564f093SHong Zhang      A - the matrix
18197504071SAmlan Barua      x - the vector on which FDFT will be performed
18297504071SAmlan Barua 
18397504071SAmlan Barua    Output parameter:
18497504071SAmlan Barua    y   - vector that stores result of FDFT
18597504071SAmlan Barua */
186b2b77a04SHong Zhang #undef __FUNCT__
187b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
188b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
189b2b77a04SHong Zhang {
190b2b77a04SHong Zhang   PetscErrorCode ierr;
191b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
192b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
193b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
194c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
195ce94432eSBarry Smith   MPI_Comm       comm;
196b2b77a04SHong Zhang 
197b2b77a04SHong Zhang   PetscFunctionBegin;
198ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
199b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
200b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
201b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
202b2b77a04SHong Zhang     switch (ndim) {
203b2b77a04SHong Zhang     case 1:
2043c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
205b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
206ae0a50aaSHong Zhang #else
2074f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
2083c3a9c75SAmlan Barua #endif
209b2b77a04SHong Zhang       break;
210b2b77a04SHong Zhang     case 2:
21197504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
212b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2133c3a9c75SAmlan Barua #else
2143c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2153c3a9c75SAmlan Barua #endif
216b2b77a04SHong Zhang       break;
217b2b77a04SHong Zhang     case 3:
2183c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
219b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2203c3a9c75SAmlan Barua #else
22151d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2223c3a9c75SAmlan Barua #endif
223b2b77a04SHong Zhang       break;
224b2b77a04SHong Zhang     default:
2253c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
226c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2273c3a9c75SAmlan Barua #else
2283c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2293c3a9c75SAmlan Barua #endif
230b2b77a04SHong Zhang       break;
231b2b77a04SHong Zhang     }
232b2b77a04SHong Zhang     fftw->finarray  = x_array;
233b2b77a04SHong Zhang     fftw->foutarray = y_array;
234b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
235b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
236b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
237b2b77a04SHong Zhang   } else { /* use existing plan */
238b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
239b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
240b2b77a04SHong Zhang     } else {
241b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
242b2b77a04SHong Zhang     }
243b2b77a04SHong Zhang   }
244b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
245b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
246b2b77a04SHong Zhang   PetscFunctionReturn(0);
247b2b77a04SHong Zhang }
248b2b77a04SHong Zhang 
24997504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT
25097504071SAmlan Barua    Input parameter:
2513564f093SHong Zhang      A - the matrix
25297504071SAmlan Barua      x - the vector on which BDFT will be performed
25397504071SAmlan Barua 
25497504071SAmlan Barua    Output parameter:
25597504071SAmlan Barua      y - vector that stores result of BDFT
25697504071SAmlan Barua */
257b2b77a04SHong Zhang #undef __FUNCT__
258b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
259b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
260b2b77a04SHong Zhang {
261b2b77a04SHong Zhang   PetscErrorCode ierr;
262b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
263b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
264b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
265c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
266ce94432eSBarry Smith   MPI_Comm       comm;
267b2b77a04SHong Zhang 
268b2b77a04SHong Zhang   PetscFunctionBegin;
269ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
270b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
271b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
272b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
273b2b77a04SHong Zhang     switch (ndim) {
274b2b77a04SHong Zhang     case 1:
2753c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
276b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
277ae0a50aaSHong Zhang #else
2784f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
2793c3a9c75SAmlan Barua #endif
280b2b77a04SHong Zhang       break;
281b2b77a04SHong Zhang     case 2:
28297504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
283b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2843c3a9c75SAmlan Barua #else
2853c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
2863c3a9c75SAmlan Barua #endif
287b2b77a04SHong Zhang       break;
288b2b77a04SHong Zhang     case 3:
2893c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
290b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2913c3a9c75SAmlan Barua #else
2923c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
2933c3a9c75SAmlan Barua #endif
294b2b77a04SHong Zhang       break;
295b2b77a04SHong Zhang     default:
2963c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
297c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2983c3a9c75SAmlan Barua #else
2993c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3003c3a9c75SAmlan Barua #endif
301b2b77a04SHong Zhang       break;
302b2b77a04SHong Zhang     }
303b2b77a04SHong Zhang     fftw->binarray  = x_array;
304b2b77a04SHong Zhang     fftw->boutarray = y_array;
305b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
306b2b77a04SHong Zhang   } else { /* use existing plan */
307b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
308b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
309b2b77a04SHong Zhang     } else {
310b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
311b2b77a04SHong Zhang     }
312b2b77a04SHong Zhang   }
313b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
314b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
315b2b77a04SHong Zhang   PetscFunctionReturn(0);
316b2b77a04SHong Zhang }
317b2b77a04SHong Zhang 
318b2b77a04SHong Zhang #undef __FUNCT__
319b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
320b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
321b2b77a04SHong Zhang {
322b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
323b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
324b2b77a04SHong Zhang   PetscErrorCode ierr;
325b2b77a04SHong Zhang 
326b2b77a04SHong Zhang   PetscFunctionBegin;
327b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
328b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
329b1301b2eSHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
330bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
3316ccf0b3eSHong Zhang   fftw_mpi_cleanup();
332b2b77a04SHong Zhang   PetscFunctionReturn(0);
333b2b77a04SHong Zhang }
334b2b77a04SHong Zhang 
335c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
336b2b77a04SHong Zhang #undef __FUNCT__
337b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
338b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
339b2b77a04SHong Zhang {
340b2b77a04SHong Zhang   PetscErrorCode ierr;
341b2b77a04SHong Zhang   PetscScalar    *array;
342b2b77a04SHong Zhang 
343b2b77a04SHong Zhang   PetscFunctionBegin;
344b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
345b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
346b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
347b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
348b2b77a04SHong Zhang   PetscFunctionReturn(0);
349b2b77a04SHong Zhang }
350b2b77a04SHong Zhang 
3514f7415efSAmlan Barua #undef __FUNCT__
3524be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW"
3534be45526SHong Zhang /*@
354b2aa233eSHong Zhang    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
3554f7415efSAmlan Barua      parallel layout determined by FFTW
3564f7415efSAmlan Barua 
3574f7415efSAmlan Barua    Collective on Mat
3584f7415efSAmlan Barua 
3594f7415efSAmlan Barua    Input Parameter:
3603564f093SHong Zhang .   A - the matrix
3614f7415efSAmlan Barua 
3624f7415efSAmlan Barua    Output Parameter:
363cc55f3b1SHong Zhang +   x - (optional) input vector of forward FFTW
364cc55f3b1SHong Zhang -   y - (optional) output vector of forward FFTW
365cc55f3b1SHong Zhang -   z - (optional) output vector of backward FFTW
3664f7415efSAmlan Barua 
3674f7415efSAmlan Barua   Level: advanced
3683564f093SHong Zhang 
36997504071SAmlan Barua   Note: The parallel layout of output of forward FFTW is always same as the input
37097504071SAmlan Barua         of the backward FFTW. But parallel layout of the input vector of forward
37197504071SAmlan Barua         FFTW might not be same as the output of backward FFTW.
37297504071SAmlan Barua         Also note that we need to provide enough space while doing parallel real transform.
37397504071SAmlan Barua         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
37497504071SAmlan Barua         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
375e0ec536eSAmlan Barua         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
376e0ec536eSAmlan Barua         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
377e0ec536eSAmlan Barua         zeros if it is odd only one zero is needed.
378e0ec536eSAmlan Barua         Lastly one needs some scratch space at the end of data set in each process. alloc_local
379e0ec536eSAmlan Barua         figures out how much space is needed, i.e. it figures out the data+scratch space for
380e0ec536eSAmlan Barua         each processor and returns that.
3814f7415efSAmlan Barua 
3824f7415efSAmlan Barua .seealso: MatCreateFFTW()
3834be45526SHong Zhang @*/
3844be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
3854be45526SHong Zhang {
3864be45526SHong Zhang   PetscErrorCode ierr;
387b4c3921fSHong Zhang 
3884be45526SHong Zhang   PetscFunctionBegin;
3894be45526SHong Zhang   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
3904be45526SHong Zhang   PetscFunctionReturn(0);
3914be45526SHong Zhang }
3924be45526SHong Zhang 
3934be45526SHong Zhang #undef __FUNCT__
3944be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW"
3954be45526SHong Zhang PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
3964f7415efSAmlan Barua {
3974f7415efSAmlan Barua   PetscErrorCode ierr;
3984f7415efSAmlan Barua   PetscMPIInt    size,rank;
399ce94432eSBarry Smith   MPI_Comm       comm;
4004f7415efSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
4014f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
4029496c9aeSAmlan Barua   PetscInt       N     = fft->N;
4034f7415efSAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
4044f7415efSAmlan Barua 
4054f7415efSAmlan Barua   PetscFunctionBegin;
4064f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
4074f7415efSAmlan Barua   PetscValidType(A,1);
408ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4094f7415efSAmlan Barua 
4104f8276c3SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
4114f8276c3SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
4124f7415efSAmlan Barua   if (size == 1) { /* sequential case */
4134f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4144f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
4154f7415efSAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
4164f7415efSAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
4174f7415efSAmlan Barua #else
4188ca4c034SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
4198ca4c034SAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
4208ca4c034SAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
4214f7415efSAmlan Barua #endif
42297504071SAmlan Barua   } else { /* parallel cases */
4234f7415efSAmlan Barua     ptrdiff_t    alloc_local,local_n0,local_0_start;
4249496c9aeSAmlan Barua     ptrdiff_t    local_n1;
4259496c9aeSAmlan Barua     fftw_complex *data_fout;
4269496c9aeSAmlan Barua     ptrdiff_t    local_1_start;
4279496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4289496c9aeSAmlan Barua     fftw_complex *data_fin,*data_bout;
4299496c9aeSAmlan Barua #else
4304f7415efSAmlan Barua     double    *data_finr,*data_boutr;
4316ccf0b3eSHong Zhang     PetscInt  n1,N1;
4329496c9aeSAmlan Barua     ptrdiff_t temp;
4339496c9aeSAmlan Barua #endif
4349496c9aeSAmlan Barua 
4354f7415efSAmlan Barua     switch (ndim) {
4364f7415efSAmlan Barua     case 1:
43757625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4384f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
43957625b84SAmlan Barua #else
44057625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
44157625b84SAmlan Barua       if (fin) {
44257625b84SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
443778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
44426fbe8dcSKarl Rupp 
44557625b84SAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
44657625b84SAmlan Barua       }
44757625b84SAmlan Barua       if (fout) {
44857625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
449778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
45026fbe8dcSKarl Rupp 
45157625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
45257625b84SAmlan Barua       }
45357625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
45457625b84SAmlan Barua       if (bout) {
45557625b84SAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
456778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
45726fbe8dcSKarl Rupp 
45857625b84SAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
45957625b84SAmlan Barua       }
46057625b84SAmlan Barua       break;
46157625b84SAmlan Barua #endif
4624f7415efSAmlan Barua     case 2:
46397504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
4644f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4654f7415efSAmlan Barua       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
4664f7415efSAmlan Barua       if (fin) {
4674f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
468778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
46926fbe8dcSKarl Rupp 
4704f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
4714f7415efSAmlan Barua       }
4724f7415efSAmlan Barua       if (bout) {
4734f7415efSAmlan Barua         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
474778a2246SBarry Smith         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
47526fbe8dcSKarl Rupp 
4764f7415efSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
4774f7415efSAmlan Barua       }
47857625b84SAmlan Barua       if (fout) {
47957625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
480778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
48126fbe8dcSKarl Rupp 
48257625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
48357625b84SAmlan Barua       }
4844f7415efSAmlan Barua #else
4854f7415efSAmlan Barua       /* Get local size */
4864f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
4874f7415efSAmlan Barua       if (fin) {
4884f7415efSAmlan Barua         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
489778a2246SBarry Smith         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
49026fbe8dcSKarl Rupp 
4914f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
4924f7415efSAmlan Barua       }
4934f7415efSAmlan Barua       if (fout) {
4944f7415efSAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
495778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
49626fbe8dcSKarl Rupp 
4974f7415efSAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
4984f7415efSAmlan Barua       }
4994f7415efSAmlan Barua       if (bout) {
5004f7415efSAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
501778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
50226fbe8dcSKarl Rupp 
5034f7415efSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5044f7415efSAmlan Barua       }
5054f7415efSAmlan Barua #endif
5064f7415efSAmlan Barua       break;
5074f7415efSAmlan Barua     case 3:
5084f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5094f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5104f7415efSAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
5114f7415efSAmlan Barua       if (fin) {
5124f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
513778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
51426fbe8dcSKarl Rupp 
5154f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5164f7415efSAmlan Barua       }
5174f7415efSAmlan Barua       if (bout) {
5184f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
519778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
52026fbe8dcSKarl Rupp 
5214f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5224f7415efSAmlan Barua       }
5233564f093SHong Zhang 
52457625b84SAmlan Barua       if (fout) {
52557625b84SAmlan Barua         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
526778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
52726fbe8dcSKarl Rupp 
52857625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
52957625b84SAmlan Barua       }
5304f7415efSAmlan Barua #else
5310c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
5320c9b18e4SAmlan Barua       if (fin) {
5330c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
534778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
53526fbe8dcSKarl Rupp 
5360c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5370c9b18e4SAmlan Barua       }
5380c9b18e4SAmlan Barua       if (fout) {
5390c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
540778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
54126fbe8dcSKarl Rupp 
5420c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5430c9b18e4SAmlan Barua       }
5440c9b18e4SAmlan Barua       if (bout) {
5450c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
546778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
54726fbe8dcSKarl Rupp 
5480c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5490c9b18e4SAmlan Barua       }
5504f7415efSAmlan Barua #endif
5514f7415efSAmlan Barua       break;
5524f7415efSAmlan Barua     default:
5534f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5544f7415efSAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
55526fbe8dcSKarl Rupp 
5564f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
55726fbe8dcSKarl Rupp 
5584f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5594f7415efSAmlan Barua       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
56026fbe8dcSKarl Rupp 
5614f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
5624f7415efSAmlan Barua 
5634f7415efSAmlan Barua       if (fin) {
5644f7415efSAmlan Barua         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
565778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
56626fbe8dcSKarl Rupp 
5674f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5684f7415efSAmlan Barua       }
5694f7415efSAmlan Barua       if (bout) {
5704f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
571778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
57226fbe8dcSKarl Rupp 
5739496c9aeSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5744f7415efSAmlan Barua       }
5753564f093SHong Zhang 
57657625b84SAmlan Barua       if (fout) {
57757625b84SAmlan Barua         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
578778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
57926fbe8dcSKarl Rupp 
58057625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
58157625b84SAmlan Barua       }
5824f7415efSAmlan Barua #else
5830c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
5840c9b18e4SAmlan Barua       if (fin) {
5850c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
586778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
58726fbe8dcSKarl Rupp 
5880c9b18e4SAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5890c9b18e4SAmlan Barua       }
5900c9b18e4SAmlan Barua       if (fout) {
5910c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
592778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
59326fbe8dcSKarl Rupp 
5940c9b18e4SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
5950c9b18e4SAmlan Barua       }
5960c9b18e4SAmlan Barua       if (bout) {
5970c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
598778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
59926fbe8dcSKarl Rupp 
6000c9b18e4SAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
6010c9b18e4SAmlan Barua       }
6024f7415efSAmlan Barua #endif
6034f7415efSAmlan Barua       break;
6044f7415efSAmlan Barua     }
6054f7415efSAmlan Barua   }
6064f7415efSAmlan Barua   PetscFunctionReturn(0);
6074f7415efSAmlan Barua }
6084f7415efSAmlan Barua 
609c92418ccSAmlan Barua #undef __FUNCT__
61074a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW"
6113564f093SHong Zhang /*@
6123564f093SHong Zhang    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
61354efbe56SHong Zhang 
6143564f093SHong Zhang    Collective on Mat
6153564f093SHong Zhang 
6163564f093SHong Zhang    Input Parameters:
6173564f093SHong Zhang +  A - FFTW matrix
6183564f093SHong Zhang -  x - the PETSc vector
6193564f093SHong Zhang 
6203564f093SHong Zhang    Output Parameters:
6213564f093SHong Zhang .  y - the FFTW vector
6223564f093SHong Zhang 
623b2b77a04SHong Zhang   Options Database Keys:
6243564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags
625b2b77a04SHong Zhang 
626b2b77a04SHong Zhang    Level: intermediate
6273564f093SHong Zhang 
62897504071SAmlan Barua    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
62997504071SAmlan Barua          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
63097504071SAmlan Barua          zeros. This routine does that job by scattering operation.
63197504071SAmlan Barua 
6323564f093SHong Zhang .seealso: VecScatterFFTWToPetsc()
6333564f093SHong Zhang @*/
6343564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
6353564f093SHong Zhang {
6363564f093SHong Zhang   PetscErrorCode ierr;
637b2b77a04SHong Zhang 
6383564f093SHong Zhang   PetscFunctionBegin;
6393564f093SHong Zhang   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
6403564f093SHong Zhang   PetscFunctionReturn(0);
6413564f093SHong Zhang }
6423c3a9c75SAmlan Barua 
6433c3a9c75SAmlan Barua #undef __FUNCT__
6441986ecc6SHong Zhang #define __FUNCT__ "VecScatterPetscToFFTW_FFTW"
64574a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
6463c3a9c75SAmlan Barua {
6473c3a9c75SAmlan Barua   PetscErrorCode ierr;
648ce94432eSBarry Smith   MPI_Comm       comm;
6493c3a9c75SAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
6503c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
6519496c9aeSAmlan Barua   PetscInt       N     =fft->N;
652b5d11533SAmlan Barua   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
6539496c9aeSAmlan Barua   PetscInt       low;
654*7a21806cSHong Zhang   PetscMPIInt    rank,size;
655*7a21806cSHong Zhang   PetscInt       vsize,vsize1;
656*7a21806cSHong Zhang   //ptrdiff_t      alloc_local,local_n0,local_0_start;
657*7a21806cSHong Zhang   ptrdiff_t      local_n0,local_0_start;
6589496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
6593c3a9c75SAmlan Barua   VecScatter     vecscat;
6603c3a9c75SAmlan Barua   IS             list1,list2;
6619496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6629496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
6639496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
6649496c9aeSAmlan Barua   PetscInt       N1, n1,NM;
6659496c9aeSAmlan Barua   ptrdiff_t      temp;
6669496c9aeSAmlan Barua #endif
6673c3a9c75SAmlan Barua 
6683564f093SHong Zhang   PetscFunctionBegin;
669ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
670f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
671f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
6720298fd71SBarry Smith   ierr = VecGetOwnershipRange(y,&low,NULL);
6733c3a9c75SAmlan Barua 
6743564f093SHong Zhang   if (size==1) {
6758ca4c034SAmlan Barua     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
6768ca4c034SAmlan Barua     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
6779496c9aeSAmlan Barua     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
6786971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
6796971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
6806971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
6816971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
682b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
6833564f093SHong Zhang   } else {
6843c3a9c75SAmlan Barua     switch (ndim) {
6853c3a9c75SAmlan Barua     case 1:
68664657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
687*7a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
68826fbe8dcSKarl Rupp 
6894f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);
6904f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,low,1,&list2);
69164657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
69264657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
69364657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
69464657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
69564657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
69664657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
69764657d84SAmlan Barua #else
698e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
69964657d84SAmlan Barua #endif
7003c3a9c75SAmlan Barua       break;
7013c3a9c75SAmlan Barua     case 2:
702bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
703*7a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
70426fbe8dcSKarl Rupp 
7054f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
7064f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
707bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
708bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
709bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
710bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
711bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
712bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
713bd59e6c6SAmlan Barua #else
7143c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
71526fbe8dcSKarl Rupp 
7163c3a9c75SAmlan Barua       N1   = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
7179496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
7189496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
7193c3a9c75SAmlan Barua 
7203564f093SHong Zhang       if (dim[1]%2==0) {
7213c3a9c75SAmlan Barua         NM = dim[1]+2;
7223564f093SHong Zhang       } else {
7233c3a9c75SAmlan Barua         NM = dim[1]+1;
7243564f093SHong Zhang       }
7253c3a9c75SAmlan Barua       for (i=0; i<local_n0; i++) {
7263c3a9c75SAmlan Barua         for (j=0; j<dim[1]; j++) {
7273c3a9c75SAmlan Barua           tempindx  = i*dim[1] + j;
7283c3a9c75SAmlan Barua           tempindx1 = i*NM + j;
72926fbe8dcSKarl Rupp 
7305b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
7313c3a9c75SAmlan Barua           indx2[tempindx]=low+tempindx1;
7323c3a9c75SAmlan Barua         }
7333c3a9c75SAmlan Barua       }
7343c3a9c75SAmlan Barua 
7353c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
7363c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
7373c3a9c75SAmlan Barua 
738f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
739f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
741f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
742b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
743b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
744b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
745b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
746bd59e6c6SAmlan Barua #endif
7479496c9aeSAmlan Barua       break;
7483c3a9c75SAmlan Barua 
7493c3a9c75SAmlan Barua     case 3:
750bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
751*7a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
75226fbe8dcSKarl Rupp 
7534f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
7544f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
755bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
756bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
757bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
758bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
759bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
760bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
761bd59e6c6SAmlan Barua #else
762*7a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
76326fbe8dcSKarl Rupp 
76451d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
76551d1eed7SAmlan Barua 
7669496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
7679496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
76851d1eed7SAmlan Barua 
769db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
770db4deed7SKarl Rupp       else             NM = dim[2]+1;
77151d1eed7SAmlan Barua 
77251d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
77351d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
77451d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
77551d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
77651d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
77726fbe8dcSKarl Rupp 
77851d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
77951d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
78051d1eed7SAmlan Barua           }
78151d1eed7SAmlan Barua         }
78251d1eed7SAmlan Barua       }
78351d1eed7SAmlan Barua 
78451d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
78551d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
78651d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
78751d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
78851d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
78951d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
790b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
791b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
792b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
793b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
794bd59e6c6SAmlan Barua #endif
7959496c9aeSAmlan Barua       break;
7963c3a9c75SAmlan Barua 
7973c3a9c75SAmlan Barua     default:
798bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
799*7a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
80026fbe8dcSKarl Rupp 
8014f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
8024f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
803bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
804bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
805bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
806bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
807bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
808bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
809bd59e6c6SAmlan Barua #else
810e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
81126fbe8dcSKarl Rupp 
812e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
81326fbe8dcSKarl Rupp 
814*7a21806cSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
81526fbe8dcSKarl Rupp 
816e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
81726fbe8dcSKarl Rupp 
818e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
819e5d7f247SAmlan Barua 
820e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
821e5d7f247SAmlan Barua 
822e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
823e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
824e5d7f247SAmlan Barua 
825db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
826db4deed7SKarl Rupp       else                  NM = 1;
827e5d7f247SAmlan Barua 
8286971673cSAmlan Barua       j = low;
8293564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
8306971673cSAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
8316971673cSAmlan Barua         indx2[i] = j;
83226fbe8dcSKarl Rupp         if (k%dim[ndim-1]==0) j+=NM;
8336971673cSAmlan Barua         j++;
8346971673cSAmlan Barua       }
8356971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
8366971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
8376971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
8386971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8396971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8406971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
841b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
842b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
843b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
844b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
845bd59e6c6SAmlan Barua #endif
8469496c9aeSAmlan Barua       break;
8473c3a9c75SAmlan Barua     }
848e81bb599SAmlan Barua   }
8493564f093SHong Zhang   PetscFunctionReturn(0);
8503c3a9c75SAmlan Barua }
8513c3a9c75SAmlan Barua 
8523c3a9c75SAmlan Barua #undef __FUNCT__
85374a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc"
8543564f093SHong Zhang /*@
8553564f093SHong Zhang    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
8563564f093SHong Zhang 
8573564f093SHong Zhang    Collective on Mat
8583564f093SHong Zhang 
8593564f093SHong Zhang     Input Parameters:
8603564f093SHong Zhang +   A - FFTW matrix
8613564f093SHong Zhang -   x - FFTW vector
8623564f093SHong Zhang 
8633564f093SHong Zhang    Output Parameters:
8643564f093SHong Zhang .  y - PETSc vector
8653564f093SHong Zhang 
8663564f093SHong Zhang    Level: intermediate
8673564f093SHong Zhang 
8683564f093SHong Zhang    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
8693564f093SHong Zhang          VecScatterFFTWToPetsc removes those extra zeros.
8703564f093SHong Zhang 
8713564f093SHong Zhang .seealso: VecScatterPetscToFFTW()
8723564f093SHong Zhang @*/
87374a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
8743c3a9c75SAmlan Barua {
8753c3a9c75SAmlan Barua   PetscErrorCode ierr;
8765fd66863SKarl Rupp 
8773c3a9c75SAmlan Barua   PetscFunctionBegin;
87874a26884SAmlan Barua   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
8793c3a9c75SAmlan Barua   PetscFunctionReturn(0);
8803c3a9c75SAmlan Barua }
88154efbe56SHong Zhang 
8823c3a9c75SAmlan Barua #undef __FUNCT__
88374a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
88474a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
8855b3e41ffSAmlan Barua {
8865b3e41ffSAmlan Barua   PetscErrorCode ierr;
887ce94432eSBarry Smith   MPI_Comm       comm;
8885b3e41ffSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
8895b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
8909496c9aeSAmlan Barua   PetscInt       N     = fft->N;
891b3655f67SAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
8929496c9aeSAmlan Barua   PetscInt       low;
893*7a21806cSHong Zhang   PetscMPIInt    rank,size;
894*7a21806cSHong Zhang   ptrdiff_t      local_n0,local_0_start;
8959496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
8965b3e41ffSAmlan Barua   VecScatter     vecscat;
8975b3e41ffSAmlan Barua   IS             list1,list2;
8989496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
8999496c9aeSAmlan Barua   PetscInt  i,j,k,partial_dim;
9009496c9aeSAmlan Barua   PetscInt  *indx1, *indx2, tempindx, tempindx1;
9019496c9aeSAmlan Barua   PetscInt  N1, n1,NM;
9029496c9aeSAmlan Barua   ptrdiff_t temp;
9039496c9aeSAmlan Barua #endif
9049496c9aeSAmlan Barua 
9053564f093SHong Zhang   PetscFunctionBegin;
906ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
9075b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
9085b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
9090298fd71SBarry Smith   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
9105b3e41ffSAmlan Barua 
911e81bb599SAmlan Barua   if (size==1) {
912b3655f67SAmlan Barua     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
9136971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
9146971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9156971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9166971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
917b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
918e81bb599SAmlan Barua 
9193564f093SHong Zhang   } else {
920e81bb599SAmlan Barua     switch (ndim) {
921e81bb599SAmlan Barua     case 1:
92264657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
923*7a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
92426fbe8dcSKarl Rupp 
9254f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);
9264f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,low,1,&list2);
92764657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
92864657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
92964657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
93064657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
93164657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
93264657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
93364657d84SAmlan Barua #else
9346a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
93564657d84SAmlan Barua #endif
9365b3e41ffSAmlan Barua       break;
9375b3e41ffSAmlan Barua     case 2:
938bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
939*7a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
94026fbe8dcSKarl Rupp 
9414f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
9424f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
943bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
944bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
947bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
948bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
949bd59e6c6SAmlan Barua #else
950*7a21806cSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
95126fbe8dcSKarl Rupp 
9525b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
9535b3e41ffSAmlan Barua 
9549496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
9559496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
9565b3e41ffSAmlan Barua 
957db4deed7SKarl Rupp       if (dim[1]%2==0) NM = dim[1]+2;
958db4deed7SKarl Rupp       else             NM = dim[1]+1;
9595b3e41ffSAmlan Barua 
9605b3e41ffSAmlan Barua       for (i=0; i<local_n0; i++) {
9615b3e41ffSAmlan Barua         for (j=0; j<dim[1]; j++) {
9625b3e41ffSAmlan Barua           tempindx = i*dim[1] + j;
9635b3e41ffSAmlan Barua           tempindx1 = i*NM + j;
96426fbe8dcSKarl Rupp 
9655b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
9665b3e41ffSAmlan Barua           indx2[tempindx]=low+tempindx1;
9675b3e41ffSAmlan Barua         }
9685b3e41ffSAmlan Barua       }
9695b3e41ffSAmlan Barua 
9705b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
9715b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
9725b3e41ffSAmlan Barua 
9735b3e41ffSAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
9745b3e41ffSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9755b3e41ffSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9765b3e41ffSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
977b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
978b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
979b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
980b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
981bd59e6c6SAmlan Barua #endif
9829496c9aeSAmlan Barua       break;
9835b3e41ffSAmlan Barua     case 3:
984bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
985*7a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
98626fbe8dcSKarl Rupp 
9874f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
9884f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
989bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
990bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
991bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
993bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
994bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
995bd59e6c6SAmlan Barua #else
996bd59e6c6SAmlan Barua 
997*7a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
99826fbe8dcSKarl Rupp 
99951d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
100051d1eed7SAmlan Barua 
10019496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
10029496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
100351d1eed7SAmlan Barua 
1004db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
1005db4deed7SKarl Rupp       else             NM = dim[2]+1;
100651d1eed7SAmlan Barua 
100751d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
100851d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
100951d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
101051d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
101151d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
101226fbe8dcSKarl Rupp 
101351d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
101451d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
101551d1eed7SAmlan Barua           }
101651d1eed7SAmlan Barua         }
101751d1eed7SAmlan Barua       }
101851d1eed7SAmlan Barua 
101951d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
102051d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
102151d1eed7SAmlan Barua 
102251d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
102351d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
102451d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
102551d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1026b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1027b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1028b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1029b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1030bd59e6c6SAmlan Barua #endif
10319496c9aeSAmlan Barua       break;
10325b3e41ffSAmlan Barua     default:
1033bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1034*7a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
103526fbe8dcSKarl Rupp 
10364f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
10374f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1038bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1039bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1040bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1041bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1042bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1043bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1044bd59e6c6SAmlan Barua #else
1045ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
104626fbe8dcSKarl Rupp 
1047ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
104826fbe8dcSKarl Rupp 
1049ba6e06d0SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
105026fbe8dcSKarl Rupp 
1051ba6e06d0SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
105226fbe8dcSKarl Rupp 
1053ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1054ba6e06d0SAmlan Barua 
1055ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1056ba6e06d0SAmlan Barua 
1057ba6e06d0SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1058ba6e06d0SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1059ba6e06d0SAmlan Barua 
1060db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
1061db4deed7SKarl Rupp       else                  NM = 1;
1062ba6e06d0SAmlan Barua 
1063ba6e06d0SAmlan Barua       j = low;
10643564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1065ba6e06d0SAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
1066ba6e06d0SAmlan Barua         indx2[i] = j;
10673564f093SHong Zhang         if (k%dim[ndim-1]==0) j+=NM;
1068ba6e06d0SAmlan Barua         j++;
1069ba6e06d0SAmlan Barua       }
1070ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1071ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1072ba6e06d0SAmlan Barua 
1073ba6e06d0SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1074ba6e06d0SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1075ba6e06d0SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1076ba6e06d0SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1077b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1078b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1079b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1080b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1081bd59e6c6SAmlan Barua #endif
10829496c9aeSAmlan Barua       break;
10835b3e41ffSAmlan Barua     }
1084e81bb599SAmlan Barua   }
10853564f093SHong Zhang   PetscFunctionReturn(0);
10865b3e41ffSAmlan Barua }
10875b3e41ffSAmlan Barua 
10885b3e41ffSAmlan Barua #undef __FUNCT__
10893c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
10903c3a9c75SAmlan Barua /*
10913564f093SHong Zhang     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
10923564f093SHong Zhang 
10933c3a9c75SAmlan Barua   Options Database Keys:
10943c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
10953c3a9c75SAmlan Barua 
10963c3a9c75SAmlan Barua    Level: intermediate
10973c3a9c75SAmlan Barua 
10983c3a9c75SAmlan Barua */
10998cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1100b2b77a04SHong Zhang {
1101b2b77a04SHong Zhang   PetscErrorCode ierr;
1102ce94432eSBarry Smith   MPI_Comm       comm;
1103b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1104b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1105b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
11065274ed1bSHong Zhang   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
11075274ed1bSHong Zhang   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1108b2b77a04SHong Zhang   PetscBool      flg;
11094f48bc67SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr;
111011902ff2SHong Zhang   PetscMPIInt    size,rank;
11119496c9aeSAmlan Barua   ptrdiff_t      *pdim;
11129496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
11139496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11149496c9aeSAmlan Barua   ptrdiff_t temp;
11158ca4c034SAmlan Barua   PetscInt  N1,tot_dim;
11164f48bc67SAmlan Barua #else
11174f48bc67SAmlan Barua   PetscInt n1;
11189496c9aeSAmlan Barua #endif
11199496c9aeSAmlan Barua 
1120b2b77a04SHong Zhang   PetscFunctionBegin;
1121ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1122b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
112311902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1124c92418ccSAmlan Barua 
11251878d478SAmlan Barua   fftw_mpi_init();
112611902ff2SHong Zhang   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
112711902ff2SHong Zhang   pdim[0] = dim[0];
11288ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11298ca4c034SAmlan Barua   tot_dim = 2*dim[0];
11308ca4c034SAmlan Barua #endif
11313564f093SHong Zhang   for (ctr=1; ctr<ndim; ctr++) {
11326882372aSHong Zhang     partial_dim *= dim[ctr];
113311902ff2SHong Zhang     pdim[ctr]    = dim[ctr];
11348ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1135db4deed7SKarl Rupp     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1136db4deed7SKarl Rupp     else             tot_dim *= dim[ctr];
11378ca4c034SAmlan Barua #endif
11386882372aSHong Zhang   }
11393c3a9c75SAmlan Barua 
1140b2b77a04SHong Zhang   if (size == 1) {
1141e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1142b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1143b2b77a04SHong Zhang     n    = N;
1144e81bb599SAmlan Barua #else
1145e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1146e81bb599SAmlan Barua     n    = tot_dim;
1147e81bb599SAmlan Barua #endif
1148e81bb599SAmlan Barua 
1149b2b77a04SHong Zhang   } else {
1150*7a21806cSHong Zhang     ptrdiff_t local_n0,local_0_start;
1151b2b77a04SHong Zhang     switch (ndim) {
1152b2b77a04SHong Zhang     case 1:
11533c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11543c3a9c75SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1155e5d7f247SAmlan Barua #else
1156*7a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
115726fbe8dcSKarl Rupp 
11586882372aSHong Zhang       n    = (PetscInt)local_n0;
11599496c9aeSAmlan Barua       n1   = (PetscInt)local_n1;
11609496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1161e5d7f247SAmlan Barua #endif
1162b2b77a04SHong Zhang       break;
1163b2b77a04SHong Zhang     case 2:
11645b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
1165*7a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1166b2b77a04SHong Zhang       /*
1167b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
11680ec8b6e3SBarry Smith        PetscSynchronizedFlush(comm,PETSC_STDOUT);
1169b2b77a04SHong Zhang        */
1170b2b77a04SHong Zhang       n    = (PetscInt)local_n0*dim[1];
1171b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
11725b3e41ffSAmlan Barua #else
11734f8276c3SHong Zhang       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
117426fbe8dcSKarl Rupp 
11755b3e41ffSAmlan Barua       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
1176c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
11775b3e41ffSAmlan Barua #endif
1178b2b77a04SHong Zhang       break;
1179b2b77a04SHong Zhang     case 3:
118051d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1181*7a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
118226fbe8dcSKarl Rupp 
11836882372aSHong Zhang       n    = (PetscInt)local_n0*dim[1]*dim[2];
11846882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
118551d1eed7SAmlan Barua #else
11864f8276c3SHong Zhang       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
118726fbe8dcSKarl Rupp 
118851d1eed7SAmlan Barua       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1189c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
119051d1eed7SAmlan Barua #endif
1191b2b77a04SHong Zhang       break;
1192b2b77a04SHong Zhang     default:
1193b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1194*7a21806cSHong Zhang       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
119526fbe8dcSKarl Rupp 
11966882372aSHong Zhang       n    = (PetscInt)local_n0*partial_dim;
11976882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1198b3a17365SAmlan Barua #else
1199b3a17365SAmlan Barua       temp = pdim[ndim-1];
120026fbe8dcSKarl Rupp 
1201b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
120226fbe8dcSKarl Rupp 
12034f8276c3SHong Zhang       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
120426fbe8dcSKarl Rupp 
1205b3a17365SAmlan Barua       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1206b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
120726fbe8dcSKarl Rupp 
1208b3a17365SAmlan Barua       pdim[ndim-1] = temp;
120926fbe8dcSKarl Rupp 
1210c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1211b3a17365SAmlan Barua #endif
1212b2b77a04SHong Zhang       break;
1213b2b77a04SHong Zhang     }
1214b2b77a04SHong Zhang   }
1215b2b77a04SHong Zhang   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1216b00a9115SJed Brown   ierr      = PetscNewLog(A,&fftw);CHKERRQ(ierr);
1217b2b77a04SHong Zhang   fft->data = (void*)fftw;
1218b2b77a04SHong Zhang 
1219b2b77a04SHong Zhang   fft->n            = n;
12200c74a584SJed Brown   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1221e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
122226fbe8dcSKarl Rupp 
12235e806cb5SMatthew G. Knepley   ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr);
122426fbe8dcSKarl Rupp 
1225b1301b2eSHong Zhang   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1226c92418ccSAmlan Barua 
1227b2b77a04SHong Zhang   fftw->p_forward  = 0;
1228b2b77a04SHong Zhang   fftw->p_backward = 0;
1229b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1230b2b77a04SHong Zhang 
1231b2b77a04SHong Zhang   if (size == 1) {
1232b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1233b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1234b2b77a04SHong Zhang   } else {
1235b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1236b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1237b2b77a04SHong Zhang   }
1238b2b77a04SHong Zhang   fft->matdestroy = MatDestroy_FFTW;
1239b2b77a04SHong Zhang   A->assembled    = PETSC_TRUE;
12404a52bad8SHong Zhang   A->preallocated = PETSC_TRUE;
124126fbe8dcSKarl Rupp 
1242bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr);
1243bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
1244bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1245b2b77a04SHong Zhang 
1246b2b77a04SHong Zhang   /* get runtime options */
1247ce94432eSBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
12485274ed1bSHong Zhang   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr);
12495274ed1bSHong Zhang   if (flg) {
12505274ed1bSHong Zhang     fftw->p_flag = iplans[p_flag];
12515274ed1bSHong Zhang   }
12524a52bad8SHong Zhang   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1253b2b77a04SHong Zhang   PetscFunctionReturn(0);
1254b2b77a04SHong Zhang }
12553c3a9c75SAmlan Barua 
12563c3a9c75SAmlan Barua 
12573c3a9c75SAmlan Barua 
12583c3a9c75SAmlan Barua 
1259