xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 3564f093082c4e39e657fbd7ab2e39df4067bb32)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14e5d7f247SAmlan Barua   PetscInt    partial_dim;
15b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
16b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
19b2b77a04SHong Zhang } Mat_FFTW;
20b2b77a04SHong Zhang 
21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27b2b77a04SHong Zhang 
2897504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel
2997504071SAmlan Barua    Input parameter:
30*3564f093SHong Zhang      A - the matrix
3197504071SAmlan Barua      x - the vector on which FDFT will be performed
3297504071SAmlan Barua 
3397504071SAmlan Barua    Output parameter:
3497504071SAmlan Barua      y - vector that stores result of FDFT
3597504071SAmlan Barua */
36b2b77a04SHong Zhang #undef __FUNCT__
37b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
38b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
39b2b77a04SHong Zhang {
40b2b77a04SHong Zhang   PetscErrorCode ierr;
41b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
42b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
43b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
44b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
45b2b77a04SHong Zhang 
46b2b77a04SHong Zhang   PetscFunctionBegin;
47b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
48b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
49b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
50b2b77a04SHong Zhang     switch (ndim){
51b2b77a04SHong Zhang     case 1:
5258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
53b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
5458a912c5SAmlan Barua #else
5558a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
5658a912c5SAmlan Barua #endif
57b2b77a04SHong Zhang       break;
58b2b77a04SHong Zhang     case 2:
5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
60b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6158a912c5SAmlan Barua #else
6258a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
6358a912c5SAmlan Barua #endif
64b2b77a04SHong Zhang       break;
65b2b77a04SHong Zhang     case 3:
6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
67b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6858a912c5SAmlan Barua #else
6958a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
7058a912c5SAmlan Barua #endif
71b2b77a04SHong Zhang       break;
72b2b77a04SHong Zhang     default:
7358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
74b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
7558a912c5SAmlan Barua #else
7658a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
7758a912c5SAmlan Barua #endif
78b2b77a04SHong Zhang       break;
79b2b77a04SHong Zhang     }
80b2b77a04SHong Zhang     fftw->finarray  = x_array;
81b2b77a04SHong Zhang     fftw->foutarray = y_array;
82b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
83b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
84b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
85b2b77a04SHong Zhang   } else { /* use existing plan */
86b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
87b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
88b2b77a04SHong Zhang     } else {
89b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
90b2b77a04SHong Zhang     }
91b2b77a04SHong Zhang   }
92b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
93b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
94b2b77a04SHong Zhang   PetscFunctionReturn(0);
95b2b77a04SHong Zhang }
96b2b77a04SHong Zhang 
9797504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT
9897504071SAmlan Barua    Input parameter:
99*3564f093SHong Zhang      A - the matrix
10097504071SAmlan Barua      x - the vector on which BDFT will be performed
10197504071SAmlan Barua 
10297504071SAmlan Barua    Output parameter:
10397504071SAmlan Barua      y - vector that stores result of BDFT
10497504071SAmlan Barua */
10597504071SAmlan Barua 
106b2b77a04SHong Zhang #undef __FUNCT__
107b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
108b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
109b2b77a04SHong Zhang {
110b2b77a04SHong Zhang   PetscErrorCode ierr;
111b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
112b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
113b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
114b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
115b2b77a04SHong Zhang 
116b2b77a04SHong Zhang   PetscFunctionBegin;
117b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
118b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
119b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
120b2b77a04SHong Zhang     switch (ndim){
121b2b77a04SHong Zhang     case 1:
12258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
123b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12458a912c5SAmlan Barua #else
125e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
12658a912c5SAmlan Barua #endif
127b2b77a04SHong Zhang       break;
128b2b77a04SHong Zhang     case 2:
12958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
130b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
13158a912c5SAmlan Barua #else
132e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
13358a912c5SAmlan Barua #endif
134b2b77a04SHong Zhang       break;
135b2b77a04SHong Zhang     case 3:
13658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
137b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
13858a912c5SAmlan Barua #else
139e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
14058a912c5SAmlan Barua #endif
141b2b77a04SHong Zhang       break;
142b2b77a04SHong Zhang     default:
14358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
144b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
14558a912c5SAmlan Barua #else
146e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
14758a912c5SAmlan Barua #endif
148b2b77a04SHong Zhang       break;
149b2b77a04SHong Zhang     }
150b2b77a04SHong Zhang     fftw->binarray  = x_array;
151b2b77a04SHong Zhang     fftw->boutarray = y_array;
152b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
153b2b77a04SHong Zhang   } else { /* use existing plan */
154b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
155b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
156b2b77a04SHong Zhang     } else {
157b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
158b2b77a04SHong Zhang     }
159b2b77a04SHong Zhang   }
160b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
161b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
162b2b77a04SHong Zhang   PetscFunctionReturn(0);
163b2b77a04SHong Zhang }
164b2b77a04SHong Zhang 
16597504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel
16697504071SAmlan Barua    Input parameter:
167*3564f093SHong Zhang      A - the matrix
16897504071SAmlan Barua      x - the vector on which FDFT will be performed
16997504071SAmlan Barua 
17097504071SAmlan Barua    Output parameter:
17197504071SAmlan Barua    y   - vector that stores result of FDFT
17297504071SAmlan Barua */
173b2b77a04SHong Zhang #undef __FUNCT__
174b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
175b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
176b2b77a04SHong Zhang {
177b2b77a04SHong Zhang   PetscErrorCode ierr;
178b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
179b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
180b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
181c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
182b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
183b2b77a04SHong Zhang 
184b2b77a04SHong Zhang   PetscFunctionBegin;
185b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
186b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
187b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
188b2b77a04SHong Zhang     switch (ndim){
189b2b77a04SHong Zhang     case 1:
1903c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
191b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
192ae0a50aaSHong Zhang #else
193ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
1943c3a9c75SAmlan Barua #endif
195b2b77a04SHong Zhang       break;
196b2b77a04SHong Zhang     case 2:
19797504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
198b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1993c3a9c75SAmlan Barua #else
2003c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2013c3a9c75SAmlan Barua #endif
202b2b77a04SHong Zhang       break;
203b2b77a04SHong Zhang     case 3:
2043c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
205b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2063c3a9c75SAmlan Barua #else
20751d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2083c3a9c75SAmlan Barua #endif
209b2b77a04SHong Zhang       break;
210b2b77a04SHong Zhang     default:
2113c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
212c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2133c3a9c75SAmlan Barua #else
2143c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2153c3a9c75SAmlan Barua #endif
216b2b77a04SHong Zhang       break;
217b2b77a04SHong Zhang     }
218b2b77a04SHong Zhang     fftw->finarray  = x_array;
219b2b77a04SHong Zhang     fftw->foutarray = y_array;
220b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
221b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
222b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
223b2b77a04SHong Zhang   } else { /* use existing plan */
224b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
225b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
226b2b77a04SHong Zhang     } else {
227b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
228b2b77a04SHong Zhang     }
229b2b77a04SHong Zhang   }
230b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
231b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
232b2b77a04SHong Zhang   PetscFunctionReturn(0);
233b2b77a04SHong Zhang }
234b2b77a04SHong Zhang 
23597504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT
23697504071SAmlan Barua    Input parameter:
237*3564f093SHong Zhang      A - the matrix
23897504071SAmlan Barua      x - the vector on which BDFT will be performed
23997504071SAmlan Barua 
24097504071SAmlan Barua    Output parameter:
24197504071SAmlan Barua      y - vector that stores result of BDFT
24297504071SAmlan Barua */
243b2b77a04SHong Zhang #undef __FUNCT__
244b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
245b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
246b2b77a04SHong Zhang {
247b2b77a04SHong Zhang   PetscErrorCode ierr;
248b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
249b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
250b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
251c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
252b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
253b2b77a04SHong Zhang 
254b2b77a04SHong Zhang   PetscFunctionBegin;
255b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
256b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
257b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
258b2b77a04SHong Zhang     switch (ndim){
259b2b77a04SHong Zhang     case 1:
2603c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
261b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
262ae0a50aaSHong Zhang #else
263ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
2643c3a9c75SAmlan Barua #endif
265b2b77a04SHong Zhang       break;
266b2b77a04SHong Zhang     case 2:
26797504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
268b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2693c3a9c75SAmlan Barua #else
2703c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2713c3a9c75SAmlan Barua #endif
272b2b77a04SHong Zhang       break;
273b2b77a04SHong Zhang     case 3:
2743c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
275b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2763c3a9c75SAmlan Barua #else
2773c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2783c3a9c75SAmlan Barua #endif
279b2b77a04SHong Zhang       break;
280b2b77a04SHong Zhang     default:
2813c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
282c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2833c3a9c75SAmlan Barua #else
2843c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2853c3a9c75SAmlan Barua #endif
286b2b77a04SHong Zhang       break;
287b2b77a04SHong Zhang     }
288b2b77a04SHong Zhang     fftw->binarray  = x_array;
289b2b77a04SHong Zhang     fftw->boutarray = y_array;
290b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
291b2b77a04SHong Zhang   } else { /* use existing plan */
292b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
293b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
294b2b77a04SHong Zhang     } else {
295b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
296b2b77a04SHong Zhang     }
297b2b77a04SHong Zhang   }
298b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
299b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
300b2b77a04SHong Zhang   PetscFunctionReturn(0);
301b2b77a04SHong Zhang }
302b2b77a04SHong Zhang 
303b2b77a04SHong Zhang #undef __FUNCT__
304b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
305b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
306b2b77a04SHong Zhang {
307b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
308b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
309b2b77a04SHong Zhang   PetscErrorCode ierr;
310b2b77a04SHong Zhang 
311b2b77a04SHong Zhang   PetscFunctionBegin;
312b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
313b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
314b1301b2eSHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
315bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
316b2b77a04SHong Zhang   PetscFunctionReturn(0);
317b2b77a04SHong Zhang }
318b2b77a04SHong Zhang 
319c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
320b2b77a04SHong Zhang #undef __FUNCT__
321b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
322b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
323b2b77a04SHong Zhang {
324b2b77a04SHong Zhang   PetscErrorCode  ierr;
325b2b77a04SHong Zhang   PetscScalar     *array;
326b2b77a04SHong Zhang 
327b2b77a04SHong Zhang   PetscFunctionBegin;
328b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
329b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
330b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
331b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
332b2b77a04SHong Zhang   PetscFunctionReturn(0);
333b2b77a04SHong Zhang }
334b2b77a04SHong Zhang 
3354f7415efSAmlan Barua #undef __FUNCT__
3364be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW"
3374be45526SHong Zhang /*@
338b2aa233eSHong Zhang    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
3394f7415efSAmlan Barua      parallel layout determined by FFTW
3404f7415efSAmlan Barua 
3414f7415efSAmlan Barua    Collective on Mat
3424f7415efSAmlan Barua 
3434f7415efSAmlan Barua    Input Parameter:
344*3564f093SHong Zhang .   A - the matrix
3454f7415efSAmlan Barua 
3464f7415efSAmlan Barua    Output Parameter:
3474f7415efSAmlan Barua +   fin - (optional) input vector of forward FFTW
3484f7415efSAmlan Barua -   fout - (optional) output vector of forward FFTW
34997504071SAmlan Barua -   bout - (optional) output vector of backward FFTW
3504f7415efSAmlan Barua 
3514f7415efSAmlan Barua   Level: advanced
352*3564f093SHong Zhang 
35397504071SAmlan Barua   Note: The parallel layout of output of forward FFTW is always same as the input
35497504071SAmlan Barua         of the backward FFTW. But parallel layout of the input vector of forward
35597504071SAmlan Barua         FFTW might not be same as the output of backward FFTW.
35697504071SAmlan Barua         Also note that we need to provide enough space while doing parallel real transform.
35797504071SAmlan Barua         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
35897504071SAmlan Barua         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
359e0ec536eSAmlan Barua         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
360e0ec536eSAmlan Barua         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
361e0ec536eSAmlan Barua         zeros if it is odd only one zero is needed.
362e0ec536eSAmlan Barua         Lastly one needs some scratch space at the end of data set in each process. alloc_local
363e0ec536eSAmlan Barua         figures out how much space is needed, i.e. it figures out the data+scratch space for
364e0ec536eSAmlan Barua         each processor and returns that.
3654f7415efSAmlan Barua 
3664f7415efSAmlan Barua .seealso: MatCreateFFTW()
3674be45526SHong Zhang @*/
3684be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
3694be45526SHong Zhang {
3704be45526SHong Zhang   PetscErrorCode ierr;
371b4c3921fSHong Zhang 
3724be45526SHong Zhang   PetscFunctionBegin;
3734be45526SHong Zhang   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
3744be45526SHong Zhang   PetscFunctionReturn(0);
3754be45526SHong Zhang }
3764be45526SHong Zhang 
3774f7415efSAmlan Barua EXTERN_C_BEGIN
3784be45526SHong Zhang #undef __FUNCT__
3794be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW"
3804be45526SHong Zhang PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
3814f7415efSAmlan Barua {
3824f7415efSAmlan Barua   PetscErrorCode ierr;
3834f7415efSAmlan Barua   PetscMPIInt    size,rank;
3844f7415efSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
3854f7415efSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
3864f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
3879496c9aeSAmlan Barua   PetscInt       N=fft->N;
3884f7415efSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
3894f7415efSAmlan Barua 
3904f7415efSAmlan Barua   PetscFunctionBegin;
3914f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
3924f7415efSAmlan Barua   PetscValidType(A,1);
3934f7415efSAmlan Barua 
3944f7415efSAmlan Barua   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
3954f7415efSAmlan Barua   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
3964f7415efSAmlan Barua   if (size == 1){ /* sequential case */
3974f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
3984f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
3994f7415efSAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
4004f7415efSAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
4014f7415efSAmlan Barua #else
4028ca4c034SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
4038ca4c034SAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
4048ca4c034SAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
4054f7415efSAmlan Barua #endif
40697504071SAmlan Barua   } else { /* parallel cases */
4074f7415efSAmlan Barua     ptrdiff_t      alloc_local,local_n0,local_0_start;
4089496c9aeSAmlan Barua     ptrdiff_t      local_n1;
4099496c9aeSAmlan Barua     fftw_complex   *data_fout;
4109496c9aeSAmlan Barua     ptrdiff_t      local_1_start;
4119496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4129496c9aeSAmlan Barua     fftw_complex   *data_fin,*data_bout;
4139496c9aeSAmlan Barua #else
4144f7415efSAmlan Barua     double         *data_finr,*data_boutr;
4159496c9aeSAmlan Barua     PetscInt       n1,N1,vsize;
4169496c9aeSAmlan Barua     ptrdiff_t      temp;
4179496c9aeSAmlan Barua #endif
4189496c9aeSAmlan Barua 
4194f7415efSAmlan Barua     switch (ndim){
4204f7415efSAmlan Barua     case 1:
42157625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
42257625b84SAmlan Barua       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
42357625b84SAmlan Barua #else
42457625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
42557625b84SAmlan Barua       if (fin) {
42657625b84SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
42757625b84SAmlan Barua         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
42857625b84SAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
42957625b84SAmlan Barua       }
43057625b84SAmlan Barua       if (fout) {
43157625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
43257625b84SAmlan Barua         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
43357625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
43457625b84SAmlan Barua       }
43557625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
43657625b84SAmlan Barua       if (bout) {
43757625b84SAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
43857625b84SAmlan Barua         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
43957625b84SAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
44057625b84SAmlan Barua       }
44157625b84SAmlan Barua       break;
44257625b84SAmlan Barua #endif
4434f7415efSAmlan Barua     case 2:
44497504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
4454f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4464f7415efSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
4474f7415efSAmlan Barua       if (fin) {
4484f7415efSAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4494f7415efSAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
4504f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4514f7415efSAmlan Barua       }
4524f7415efSAmlan Barua       if (bout) {
4534f7415efSAmlan Barua         data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4544f7415efSAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
4554f7415efSAmlan Barua         ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr);
4564f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4574f7415efSAmlan Barua       }
45857625b84SAmlan Barua       if (fout) {
45957625b84SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4609496c9aeSAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
46157625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
46257625b84SAmlan Barua       }
4634f7415efSAmlan Barua #else
4644f7415efSAmlan Barua       /* Get local size */
4654f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
4664f7415efSAmlan Barua       if (fin) {
4674f7415efSAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4684f7415efSAmlan Barua         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
4694f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4704f7415efSAmlan Barua       }
4714f7415efSAmlan Barua       if (fout) {
4724f7415efSAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4734f7415efSAmlan Barua         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
4744f7415efSAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
4754f7415efSAmlan Barua       }
4764f7415efSAmlan Barua       if (bout) {
4774f7415efSAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4784f7415efSAmlan Barua         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
4794f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4804f7415efSAmlan Barua       }
4814f7415efSAmlan Barua #endif
4824f7415efSAmlan Barua       break;
4834f7415efSAmlan Barua     case 3:
4844f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4854f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4864f7415efSAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
4874f7415efSAmlan Barua       if (fin) {
4884f7415efSAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4894f7415efSAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
4904f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4914f7415efSAmlan Barua       }
4924f7415efSAmlan Barua       if (bout) {
4934f7415efSAmlan Barua         data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4944f7415efSAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
4954f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4964f7415efSAmlan Barua       }
497*3564f093SHong Zhang 
49857625b84SAmlan Barua       if (fout) {
49957625b84SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
50057625b84SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
50157625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
50257625b84SAmlan Barua       }
5034f7415efSAmlan Barua #else
5040c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
5050c9b18e4SAmlan Barua       if (fin) {
5060c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5070c9b18e4SAmlan Barua         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5080c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5090c9b18e4SAmlan Barua       }
5100c9b18e4SAmlan Barua       if (fout) {
5110c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5120c9b18e4SAmlan Barua         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5130c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5140c9b18e4SAmlan Barua       }
5150c9b18e4SAmlan Barua       if (bout) {
5160c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5170c9b18e4SAmlan Barua         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5180c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5190c9b18e4SAmlan Barua       }
5204f7415efSAmlan Barua #endif
5214f7415efSAmlan Barua       break;
5224f7415efSAmlan Barua     default:
5234f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5244f7415efSAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
5254f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
5264f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5274f7415efSAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
5284f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
5294f7415efSAmlan Barua 
5304f7415efSAmlan Barua       if (fin) {
5314f7415efSAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5324f7415efSAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
5334f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5344f7415efSAmlan Barua       }
5354f7415efSAmlan Barua       if (bout) {
5364f7415efSAmlan Barua         data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5374f7415efSAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
5389496c9aeSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5394f7415efSAmlan Barua       }
540*3564f093SHong Zhang 
54157625b84SAmlan Barua       if (fout) {
54257625b84SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
54357625b84SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
54457625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
54557625b84SAmlan Barua       }
5464f7415efSAmlan Barua #else
5470c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
5480c9b18e4SAmlan Barua       if (fin) {
5490c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5500c9b18e4SAmlan Barua         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5510c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5520c9b18e4SAmlan Barua       }
5530c9b18e4SAmlan Barua       if (fout) {
5540c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5550c9b18e4SAmlan Barua         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5560c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5570c9b18e4SAmlan Barua       }
5580c9b18e4SAmlan Barua       if (bout) {
5590c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5600c9b18e4SAmlan Barua         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5610c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5620c9b18e4SAmlan Barua       }
5634f7415efSAmlan Barua #endif
5644f7415efSAmlan Barua       break;
5654f7415efSAmlan Barua     }
5664f7415efSAmlan Barua   }
5674f7415efSAmlan Barua   PetscFunctionReturn(0);
5684f7415efSAmlan Barua }
5694f7415efSAmlan Barua EXTERN_C_END
5704f7415efSAmlan Barua 
571c92418ccSAmlan Barua #undef __FUNCT__
57274a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW"
573*3564f093SHong Zhang /*@
574*3564f093SHong Zhang    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
57554efbe56SHong Zhang 
576*3564f093SHong Zhang    Collective on Mat
577*3564f093SHong Zhang 
578*3564f093SHong Zhang    Input Parameters:
579*3564f093SHong Zhang +  A - FFTW matrix
580*3564f093SHong Zhang -  x - the PETSc vector
581*3564f093SHong Zhang 
582*3564f093SHong Zhang    Output Parameters:
583*3564f093SHong Zhang .  y - the FFTW vector
584*3564f093SHong Zhang 
585b2b77a04SHong Zhang   Options Database Keys:
586*3564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags
587b2b77a04SHong Zhang 
588b2b77a04SHong Zhang    Level: intermediate
589*3564f093SHong Zhang 
59097504071SAmlan Barua    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
59197504071SAmlan Barua          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
59297504071SAmlan Barua          zeros. This routine does that job by scattering operation.
59397504071SAmlan Barua 
594*3564f093SHong Zhang .seealso: VecScatterFFTWToPetsc()
595*3564f093SHong Zhang @*/
596*3564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
597*3564f093SHong Zhang {
598*3564f093SHong Zhang   PetscErrorCode ierr;
599b2b77a04SHong Zhang 
600*3564f093SHong Zhang   PetscFunctionBegin;
601*3564f093SHong Zhang   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
602*3564f093SHong Zhang   PetscFunctionReturn(0);
603*3564f093SHong Zhang }
6043c3a9c75SAmlan Barua 
6053c3a9c75SAmlan Barua EXTERN_C_BEGIN
6063c3a9c75SAmlan Barua #undef __FUNCT__
60774a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW_FTTW"
60874a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
6093c3a9c75SAmlan Barua {
6103c3a9c75SAmlan Barua   PetscErrorCode ierr;
6113c3a9c75SAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
6123c3a9c75SAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
6133c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
6149496c9aeSAmlan Barua   PetscInt       N=fft->N;
615b5d11533SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
6169496c9aeSAmlan Barua   PetscInt       low;
6178ca4c034SAmlan Barua   PetscInt       rank,size,vsize,vsize1;
6183c3a9c75SAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
6199496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
6203c3a9c75SAmlan Barua   VecScatter     vecscat;
6213c3a9c75SAmlan Barua   IS             list1,list2;
6229496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6239496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
6249496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
6259496c9aeSAmlan Barua   PetscInt       N1, n1 ,NM;
6269496c9aeSAmlan Barua   ptrdiff_t      temp;
6279496c9aeSAmlan Barua #endif
6283c3a9c75SAmlan Barua 
629*3564f093SHong Zhang   PetscFunctionBegin;
630f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
631f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
6323c3a9c75SAmlan Barua   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
6333c3a9c75SAmlan Barua 
634*3564f093SHong Zhang   if (size==1){
6358ca4c034SAmlan Barua     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
6368ca4c034SAmlan Barua     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
6379496c9aeSAmlan Barua     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
6386971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
6396971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
6406971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
6416971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
642b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
643*3564f093SHong Zhang   } else {
6443c3a9c75SAmlan Barua     switch (ndim){
6453c3a9c75SAmlan Barua     case 1:
64664657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
64764657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
64864657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
64964657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
65064657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
65164657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
65264657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
65364657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
65464657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
65564657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
65664657d84SAmlan Barua #else
657e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
65864657d84SAmlan Barua #endif
6593c3a9c75SAmlan Barua       break;
6603c3a9c75SAmlan Barua     case 2:
661bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
662bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
663bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
664bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
665bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
666bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
667bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
668bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
669bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
670bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
671bd59e6c6SAmlan Barua #else
6723c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
6733c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
6749496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
6759496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
6763c3a9c75SAmlan Barua 
677*3564f093SHong Zhang       if (dim[1]%2==0){
6783c3a9c75SAmlan Barua         NM = dim[1]+2;
679*3564f093SHong Zhang       } else {
6803c3a9c75SAmlan Barua         NM = dim[1]+1;
681*3564f093SHong Zhang       }
6823c3a9c75SAmlan Barua       for (i=0;i<local_n0;i++){
6833c3a9c75SAmlan Barua          for (j=0;j<dim[1];j++){
6843c3a9c75SAmlan Barua            tempindx = i*dim[1] + j;
6853c3a9c75SAmlan Barua            tempindx1 = i*NM + j;
6865b3e41ffSAmlan Barua            indx1[tempindx]=local_0_start*dim[1]+tempindx;
6873c3a9c75SAmlan Barua            indx2[tempindx]=low+tempindx1;
6883c3a9c75SAmlan Barua          }
6893c3a9c75SAmlan Barua       }
6903c3a9c75SAmlan Barua 
6913c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
6923c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
6933c3a9c75SAmlan Barua 
694f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
695f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
696f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
697f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
698b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
699b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
700b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
701b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
702bd59e6c6SAmlan Barua #endif
7039496c9aeSAmlan Barua       break;
7043c3a9c75SAmlan Barua 
7053c3a9c75SAmlan Barua     case 3:
706bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
707bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
708bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
709bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
710bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
711bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
712bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
713bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
714bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
715bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
716bd59e6c6SAmlan Barua #else
71751d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
71851d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
71951d1eed7SAmlan Barua 
7209496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
7219496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
72251d1eed7SAmlan Barua 
72351d1eed7SAmlan Barua       if (dim[2]%2==0)
72451d1eed7SAmlan Barua         NM = dim[2]+2;
72551d1eed7SAmlan Barua       else
72651d1eed7SAmlan Barua         NM = dim[2]+1;
72751d1eed7SAmlan Barua 
72851d1eed7SAmlan Barua       for (i=0;i<local_n0;i++){
72951d1eed7SAmlan Barua         for (j=0;j<dim[1];j++){
73051d1eed7SAmlan Barua           for (k=0;k<dim[2];k++){
73151d1eed7SAmlan Barua             tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
73251d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
73351d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
73451d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
73551d1eed7SAmlan Barua           }
73651d1eed7SAmlan Barua         }
73751d1eed7SAmlan Barua       }
73851d1eed7SAmlan Barua 
73951d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
74051d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
74151d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
74251d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
74351d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
74451d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
745b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
746b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
747b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
748b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
749bd59e6c6SAmlan Barua #endif
7509496c9aeSAmlan Barua       break;
7513c3a9c75SAmlan Barua 
7523c3a9c75SAmlan Barua     default:
753bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
754bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
755bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
756bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
757bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
758bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
759bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
760bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
761bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
762bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
763bd59e6c6SAmlan Barua #else
764e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
765e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
766e5d7f247SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
767e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
768e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
769e5d7f247SAmlan Barua 
770e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
771e5d7f247SAmlan Barua 
772e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
773e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
774e5d7f247SAmlan Barua 
775e5d7f247SAmlan Barua       if (dim[ndim-1]%2==0)
776ba6e06d0SAmlan Barua         NM = 2;
777e5d7f247SAmlan Barua       else
778ba6e06d0SAmlan Barua         NM = 1;
779e5d7f247SAmlan Barua 
7806971673cSAmlan Barua       j = low;
781*3564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++){
7826971673cSAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
7836971673cSAmlan Barua         indx2[i] = j;
784*3564f093SHong Zhang         if (k%dim[ndim-1]==0){ j+=NM;}
7856971673cSAmlan Barua         j++;
7866971673cSAmlan Barua       }
7876971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
7886971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
7896971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
7906971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7916971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7926971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
793b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
794b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
795b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
796b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
797bd59e6c6SAmlan Barua #endif
7989496c9aeSAmlan Barua       break;
7993c3a9c75SAmlan Barua     }
800e81bb599SAmlan Barua   }
801*3564f093SHong Zhang   PetscFunctionReturn(0);
8023c3a9c75SAmlan Barua }
8033c3a9c75SAmlan Barua EXTERN_C_END
8043c3a9c75SAmlan Barua 
8053c3a9c75SAmlan Barua #undef __FUNCT__
80674a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc"
807*3564f093SHong Zhang /*@
808*3564f093SHong Zhang    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
809*3564f093SHong Zhang 
810*3564f093SHong Zhang    Collective on Mat
811*3564f093SHong Zhang 
812*3564f093SHong Zhang     Input Parameters:
813*3564f093SHong Zhang +   A - FFTW matrix
814*3564f093SHong Zhang -   x - FFTW vector
815*3564f093SHong Zhang 
816*3564f093SHong Zhang    Output Parameters:
817*3564f093SHong Zhang .  y - PETSc vector
818*3564f093SHong Zhang 
819*3564f093SHong Zhang    Level: intermediate
820*3564f093SHong Zhang 
821*3564f093SHong Zhang    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
822*3564f093SHong Zhang          VecScatterFFTWToPetsc removes those extra zeros.
823*3564f093SHong Zhang 
824*3564f093SHong Zhang .seealso: VecScatterPetscToFFTW()
825*3564f093SHong Zhang @*/
82674a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
8273c3a9c75SAmlan Barua {
8283c3a9c75SAmlan Barua   PetscErrorCode ierr;
8293c3a9c75SAmlan Barua   PetscFunctionBegin;
83074a26884SAmlan Barua   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
8313c3a9c75SAmlan Barua   PetscFunctionReturn(0);
8323c3a9c75SAmlan Barua }
83354efbe56SHong Zhang 
8343c3a9c75SAmlan Barua EXTERN_C_BEGIN
8353c3a9c75SAmlan Barua #undef __FUNCT__
83674a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
83774a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
8385b3e41ffSAmlan Barua {
8395b3e41ffSAmlan Barua   PetscErrorCode ierr;
8405b3e41ffSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
8415b3e41ffSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
8425b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
8439496c9aeSAmlan Barua   PetscInt       N=fft->N;
844b3655f67SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
8459496c9aeSAmlan Barua   PetscInt       low;
8469496c9aeSAmlan Barua   PetscInt       rank,size;
8475b3e41ffSAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
8489496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
8495b3e41ffSAmlan Barua   VecScatter     vecscat;
8505b3e41ffSAmlan Barua   IS             list1,list2;
8519496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
8529496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
8539496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
8549496c9aeSAmlan Barua   PetscInt       N1, n1 ,NM;
8559496c9aeSAmlan Barua   ptrdiff_t      temp;
8569496c9aeSAmlan Barua #endif
8579496c9aeSAmlan Barua 
858*3564f093SHong Zhang   PetscFunctionBegin;
8595b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
8605b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
861b3655f67SAmlan Barua   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr);
8625b3e41ffSAmlan Barua 
863e81bb599SAmlan Barua   if (size==1){
864b3655f67SAmlan Barua     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
8656971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
8666971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8676971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8686971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
869b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
870e81bb599SAmlan Barua 
871*3564f093SHong Zhang   } else{
872e81bb599SAmlan Barua     switch (ndim){
873e81bb599SAmlan Barua     case 1:
87464657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
87564657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
8769496c9aeSAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1);
8779496c9aeSAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2);
87864657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
87964657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
88064657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
88164657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
88264657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
88364657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
88464657d84SAmlan Barua #else
8856a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
88664657d84SAmlan Barua #endif
8875b3e41ffSAmlan Barua       break;
8885b3e41ffSAmlan Barua     case 2:
889bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
890bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
891bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
892bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
893bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
894bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
895bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
896bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
897bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
898bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
899bd59e6c6SAmlan Barua #else
9005b3e41ffSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
9015b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
9025b3e41ffSAmlan Barua 
9039496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
9049496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
9055b3e41ffSAmlan Barua 
9065b3e41ffSAmlan Barua       if (dim[1]%2==0)
9075b3e41ffSAmlan Barua         NM = dim[1]+2;
9085b3e41ffSAmlan Barua       else
9095b3e41ffSAmlan Barua         NM = dim[1]+1;
9105b3e41ffSAmlan Barua 
9115b3e41ffSAmlan Barua       for (i=0;i<local_n0;i++){
9125b3e41ffSAmlan Barua         for (j=0;j<dim[1];j++){
9135b3e41ffSAmlan Barua           tempindx = i*dim[1] + j;
9145b3e41ffSAmlan Barua           tempindx1 = i*NM + j;
9155b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
9165b3e41ffSAmlan Barua           indx2[tempindx]=low+tempindx1;
9175b3e41ffSAmlan Barua         }
9185b3e41ffSAmlan Barua       }
9195b3e41ffSAmlan Barua 
9205b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
9215b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
9225b3e41ffSAmlan Barua 
9235b3e41ffSAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
9245b3e41ffSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9255b3e41ffSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9265b3e41ffSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
927b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
928b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
929b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
930b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
931bd59e6c6SAmlan Barua #endif
9329496c9aeSAmlan Barua       break;
9335b3e41ffSAmlan Barua     case 3:
934bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
935bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
936bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
937bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
938bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
939bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
941bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
942bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
943bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
944bd59e6c6SAmlan Barua #else
945bd59e6c6SAmlan Barua 
94651d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
94751d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
94851d1eed7SAmlan Barua 
9499496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
9509496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
95151d1eed7SAmlan Barua 
95251d1eed7SAmlan Barua       if (dim[2]%2==0)
95351d1eed7SAmlan Barua         NM = dim[2]+2;
95451d1eed7SAmlan Barua       else
95551d1eed7SAmlan Barua         NM = dim[2]+1;
95651d1eed7SAmlan Barua 
95751d1eed7SAmlan Barua       for (i=0;i<local_n0;i++){
95851d1eed7SAmlan Barua         for (j=0;j<dim[1];j++){
95951d1eed7SAmlan Barua           for (k=0;k<dim[2];k++){
96051d1eed7SAmlan Barua             tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
96151d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
96251d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
96351d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
96451d1eed7SAmlan Barua           }
96551d1eed7SAmlan Barua         }
96651d1eed7SAmlan Barua       }
96751d1eed7SAmlan Barua 
96851d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
96951d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
97051d1eed7SAmlan Barua 
97151d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
97251d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
97351d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
97451d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
975b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
976b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
977b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
978b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
979bd59e6c6SAmlan Barua #endif
9809496c9aeSAmlan Barua       break;
9815b3e41ffSAmlan Barua     default:
982bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
983bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
984bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
985bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
986bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
987bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
988bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
989bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
990bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
991bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
992bd59e6c6SAmlan Barua #else
993ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
994ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
995ba6e06d0SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
996ba6e06d0SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
997ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
998ba6e06d0SAmlan Barua 
999ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1000ba6e06d0SAmlan Barua 
1001ba6e06d0SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1002ba6e06d0SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1003ba6e06d0SAmlan Barua 
1004ba6e06d0SAmlan Barua       if (dim[ndim-1]%2==0)
1005ba6e06d0SAmlan Barua         NM = 2;
1006ba6e06d0SAmlan Barua       else
1007ba6e06d0SAmlan Barua         NM = 1;
1008ba6e06d0SAmlan Barua 
1009ba6e06d0SAmlan Barua       j = low;
1010*3564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++){
1011ba6e06d0SAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
1012ba6e06d0SAmlan Barua         indx2[i] = j;
1013*3564f093SHong Zhang         if (k%dim[ndim-1]==0)j+=NM;
1014ba6e06d0SAmlan Barua         j++;
1015ba6e06d0SAmlan Barua       }
1016ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1017ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1018ba6e06d0SAmlan Barua 
1019ba6e06d0SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1020ba6e06d0SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1021ba6e06d0SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1022ba6e06d0SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1023b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1024b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1025b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1026b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1027bd59e6c6SAmlan Barua #endif
10289496c9aeSAmlan Barua       break;
10295b3e41ffSAmlan Barua     }
1030e81bb599SAmlan Barua   }
1031*3564f093SHong Zhang   PetscFunctionReturn(0);
10325b3e41ffSAmlan Barua }
10335b3e41ffSAmlan Barua EXTERN_C_END
10345b3e41ffSAmlan Barua 
10355b3e41ffSAmlan Barua EXTERN_C_BEGIN
10365b3e41ffSAmlan Barua #undef __FUNCT__
10373c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
10383c3a9c75SAmlan Barua /*
1039*3564f093SHong Zhang     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1040*3564f093SHong Zhang 
10413c3a9c75SAmlan Barua   Options Database Keys:
10423c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
10433c3a9c75SAmlan Barua 
10443c3a9c75SAmlan Barua    Level: intermediate
10453c3a9c75SAmlan Barua 
10463c3a9c75SAmlan Barua */
1047b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A)
1048b2b77a04SHong Zhang {
1049b2b77a04SHong Zhang   PetscErrorCode ierr;
1050b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
1051b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1052b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1053b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1054b2b77a04SHong Zhang   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1055b2b77a04SHong Zhang   PetscBool      flg;
10564f48bc67SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr;
105711902ff2SHong Zhang   PetscMPIInt    size,rank;
10589496c9aeSAmlan Barua   ptrdiff_t      *pdim;
10599496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
10609496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
10619496c9aeSAmlan Barua    ptrdiff_t     temp;
10628ca4c034SAmlan Barua    PetscInt      N1,tot_dim;
10634f48bc67SAmlan Barua #else
10644f48bc67SAmlan Barua    PetscInt      n1;
10659496c9aeSAmlan Barua #endif
10669496c9aeSAmlan Barua 
1067b2b77a04SHong Zhang   PetscFunctionBegin;
1068b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
106911902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1070c92418ccSAmlan Barua 
10711878d478SAmlan Barua   fftw_mpi_init();
107211902ff2SHong Zhang   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
107311902ff2SHong Zhang   pdim[0] = dim[0];
10748ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
10758ca4c034SAmlan Barua   tot_dim = 2*dim[0];
10768ca4c034SAmlan Barua #endif
1077*3564f093SHong Zhang   for (ctr=1;ctr<ndim;ctr++){
10786882372aSHong Zhang     partial_dim *= dim[ctr];
107911902ff2SHong Zhang     pdim[ctr] = dim[ctr];
10808ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
10818ca4c034SAmlan Barua     if (ctr==ndim-1)
10828ca4c034SAmlan Barua       tot_dim *= (dim[ctr]/2+1);
10838ca4c034SAmlan Barua     else
10848ca4c034SAmlan Barua       tot_dim *= dim[ctr];
10858ca4c034SAmlan Barua #endif
10866882372aSHong Zhang   }
10873c3a9c75SAmlan Barua 
1088b2b77a04SHong Zhang   if (size == 1) {
1089e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1090b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1091b2b77a04SHong Zhang     n = N;
1092e81bb599SAmlan Barua #else
1093e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1094e81bb599SAmlan Barua     n = tot_dim;
1095e81bb599SAmlan Barua #endif
1096e81bb599SAmlan Barua 
1097b2b77a04SHong Zhang   } else {
10981abc6020SAmlan Barua     ptrdiff_t alloc_local,local_n0,local_0_start;
1099b2b77a04SHong Zhang     switch (ndim){
1100b2b77a04SHong Zhang     case 1:
11013c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11023c3a9c75SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1103e5d7f247SAmlan Barua #else
11049496c9aeSAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
11056882372aSHong Zhang       n = (PetscInt)local_n0;
11069496c9aeSAmlan Barua       n1 = (PetscInt) local_n1;
11079496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1108e5d7f247SAmlan Barua #endif
1109b2b77a04SHong Zhang       break;
1110b2b77a04SHong Zhang     case 2:
11115b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
1112b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1113b2b77a04SHong Zhang       /*
1114b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1115b2b77a04SHong Zhang        PetscSynchronizedFlush(comm);
1116b2b77a04SHong Zhang        */
1117b2b77a04SHong Zhang       n = (PetscInt)local_n0*dim[1];
1118b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
11195b3e41ffSAmlan Barua #else
11205b3e41ffSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
11215b3e41ffSAmlan Barua       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1122c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
11235b3e41ffSAmlan Barua #endif
1124b2b77a04SHong Zhang       break;
1125b2b77a04SHong Zhang     case 3:
112651d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
112751d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
11286882372aSHong Zhang       n = (PetscInt)local_n0*dim[1]*dim[2];
11296882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
113051d1eed7SAmlan Barua #else
113151d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
113251d1eed7SAmlan Barua       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1133c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
113451d1eed7SAmlan Barua #endif
1135b2b77a04SHong Zhang       break;
1136b2b77a04SHong Zhang     default:
1137b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
113811902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
11396882372aSHong Zhang       n = (PetscInt)local_n0*partial_dim;
11406882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1141b3a17365SAmlan Barua #else
1142b3a17365SAmlan Barua       temp = pdim[ndim-1];
1143b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
1144b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1145b3a17365SAmlan Barua       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1146b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1147b3a17365SAmlan Barua       pdim[ndim-1] = temp;
1148c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1149b3a17365SAmlan Barua #endif
1150b2b77a04SHong Zhang       break;
1151b2b77a04SHong Zhang     }
1152b2b77a04SHong Zhang   }
1153b2b77a04SHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1154b2b77a04SHong Zhang   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1155b2b77a04SHong Zhang   fft->data = (void*)fftw;
1156b2b77a04SHong Zhang 
1157b2b77a04SHong Zhang   fft->n            = n;
1158c92418ccSAmlan Barua   fftw->ndim_fftw   = (ptrdiff_t)ndim; // This is dimension of fft
1159e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
1160c92418ccSAmlan Barua   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1161b1301b2eSHong Zhang   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1162c92418ccSAmlan Barua 
1163b2b77a04SHong Zhang   fftw->p_forward  = 0;
1164b2b77a04SHong Zhang   fftw->p_backward = 0;
1165b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1166b2b77a04SHong Zhang 
1167b2b77a04SHong Zhang   if (size == 1){
1168b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1169b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1170b2b77a04SHong Zhang   } else {
1171b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1172b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1173b2b77a04SHong Zhang   }
1174b2b77a04SHong Zhang   fft->matdestroy          = MatDestroy_FFTW;
1175b2b77a04SHong Zhang   A->assembled          = PETSC_TRUE;
11764be45526SHong Zhang   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);
117774a26884SAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW);
117874a26884SAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW);
1179b2b77a04SHong Zhang 
1180b2b77a04SHong Zhang   /* get runtime options */
1181b2b77a04SHong Zhang   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1182b2b77a04SHong Zhang     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1183b2b77a04SHong Zhang     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1184b2b77a04SHong Zhang   PetscOptionsEnd();
1185b2b77a04SHong Zhang   PetscFunctionReturn(0);
1186b2b77a04SHong Zhang }
1187b2b77a04SHong Zhang EXTERN_C_END
11883c3a9c75SAmlan Barua 
11893c3a9c75SAmlan Barua 
11903c3a9c75SAmlan Barua 
11913c3a9c75SAmlan Barua 
1192