xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision b5d11533a7f56b3c92ca15a84106f2d35851d12f)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14e5d7f247SAmlan Barua   PetscInt    partial_dim;
15b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
16b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
19b2b77a04SHong Zhang } Mat_FFTW;
20b2b77a04SHong Zhang 
21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27b2b77a04SHong Zhang 
2897504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel
2997504071SAmlan Barua 
3097504071SAmlan Barua    Input parameter:
3197504071SAmlan Barua    mat - the matrix
3297504071SAmlan Barua    x   - the vector on which FDFT will be performed
3397504071SAmlan Barua 
3497504071SAmlan Barua    Output parameter:
3597504071SAmlan Barua    y   - vector that stores result of FDFT
3697504071SAmlan Barua 
3797504071SAmlan Barua */
38b2b77a04SHong Zhang #undef __FUNCT__
39b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
40b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
41b2b77a04SHong Zhang {
42b2b77a04SHong Zhang   PetscErrorCode ierr;
43b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
44b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
45b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
46b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
47b2b77a04SHong Zhang 
48b2b77a04SHong Zhang   PetscFunctionBegin;
49b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
50b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
51b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
52b2b77a04SHong Zhang     switch (ndim){
53b2b77a04SHong Zhang     case 1:
5458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
55b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
5658a912c5SAmlan Barua #else
5758a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
5858a912c5SAmlan Barua #endif
59b2b77a04SHong Zhang       break;
60b2b77a04SHong Zhang     case 2:
6158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
62b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6358a912c5SAmlan Barua #else
6458a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
6558a912c5SAmlan Barua #endif
66b2b77a04SHong Zhang       break;
67b2b77a04SHong Zhang     case 3:
6858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
69b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
7058a912c5SAmlan Barua #else
7158a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
7258a912c5SAmlan Barua #endif
73b2b77a04SHong Zhang       break;
74b2b77a04SHong Zhang     default:
7558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
76b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
7758a912c5SAmlan Barua #else
7858a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
7958a912c5SAmlan Barua #endif
80b2b77a04SHong Zhang       break;
81b2b77a04SHong Zhang     }
82b2b77a04SHong Zhang     fftw->finarray  = x_array;
83b2b77a04SHong Zhang     fftw->foutarray = y_array;
84b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
85b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
86b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
87b2b77a04SHong Zhang   } else { /* use existing plan */
88b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
89b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
90b2b77a04SHong Zhang     } else {
91b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
92b2b77a04SHong Zhang     }
93b2b77a04SHong Zhang   }
94b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
95b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
96b2b77a04SHong Zhang   PetscFunctionReturn(0);
97b2b77a04SHong Zhang }
98b2b77a04SHong Zhang 
9997504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT
10097504071SAmlan Barua 
10197504071SAmlan Barua    Input parameter:
10297504071SAmlan Barua    mat - the matrix
10397504071SAmlan Barua    x   - the vector on which BDFT will be performed
10497504071SAmlan Barua 
10597504071SAmlan Barua    Output parameter:
10697504071SAmlan Barua    y   - vector that stores result of BDFT
10797504071SAmlan Barua 
10897504071SAmlan Barua */
10997504071SAmlan Barua 
110b2b77a04SHong Zhang #undef __FUNCT__
111b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
112b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
113b2b77a04SHong Zhang {
114b2b77a04SHong Zhang   PetscErrorCode ierr;
115b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
116b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
117b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
118b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
119b2b77a04SHong Zhang 
120b2b77a04SHong Zhang   PetscFunctionBegin;
121b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
122b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
123b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
124b2b77a04SHong Zhang     switch (ndim){
125b2b77a04SHong Zhang     case 1:
12658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
127b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12858a912c5SAmlan Barua #else
129e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
13058a912c5SAmlan Barua #endif
131b2b77a04SHong Zhang       break;
132b2b77a04SHong Zhang     case 2:
13358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
134b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
13558a912c5SAmlan Barua #else
136e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
13758a912c5SAmlan Barua #endif
138b2b77a04SHong Zhang       break;
139b2b77a04SHong Zhang     case 3:
14058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
141b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
14258a912c5SAmlan Barua #else
143e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
14458a912c5SAmlan Barua #endif
145b2b77a04SHong Zhang       break;
146b2b77a04SHong Zhang     default:
14758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
148b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
14958a912c5SAmlan Barua #else
150e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
15158a912c5SAmlan Barua #endif
152b2b77a04SHong Zhang       break;
153b2b77a04SHong Zhang     }
154b2b77a04SHong Zhang     fftw->binarray  = x_array;
155b2b77a04SHong Zhang     fftw->boutarray = y_array;
156b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
157b2b77a04SHong Zhang   } else { /* use existing plan */
158b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
159b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
160b2b77a04SHong Zhang     } else {
161b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
162b2b77a04SHong Zhang     }
163b2b77a04SHong Zhang   }
164b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
165b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
166b2b77a04SHong Zhang   PetscFunctionReturn(0);
167b2b77a04SHong Zhang }
168b2b77a04SHong Zhang 
16997504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel
17097504071SAmlan Barua 
17197504071SAmlan Barua    Input parameter:
17297504071SAmlan Barua    mat - the matrix
17397504071SAmlan Barua    x   - the vector on which FDFT will be performed
17497504071SAmlan Barua 
17597504071SAmlan Barua    Output parameter:
17697504071SAmlan Barua    y   - vector that stores result of FDFT
17797504071SAmlan Barua 
17897504071SAmlan Barua */
17997504071SAmlan Barua 
180b2b77a04SHong Zhang #undef __FUNCT__
181b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
182b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
183b2b77a04SHong Zhang {
184b2b77a04SHong Zhang   PetscErrorCode ierr;
185b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
186b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
187b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
188c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
189b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
190b2b77a04SHong Zhang 
191b2b77a04SHong Zhang   PetscFunctionBegin;
192b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
193b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
194b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
195b2b77a04SHong Zhang     switch (ndim){
196b2b77a04SHong Zhang     case 1:
1973c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
198b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
199ae0a50aaSHong Zhang #else
200ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
2013c3a9c75SAmlan Barua #endif
202b2b77a04SHong Zhang       break;
203b2b77a04SHong Zhang     case 2:
20497504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
205b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2063c3a9c75SAmlan Barua #else
2073c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2083c3a9c75SAmlan Barua #endif
209b2b77a04SHong Zhang       break;
210b2b77a04SHong Zhang     case 3:
2113c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
212b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2133c3a9c75SAmlan Barua #else
21451d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2153c3a9c75SAmlan Barua #endif
216b2b77a04SHong Zhang       break;
217b2b77a04SHong Zhang     default:
2183c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
219c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2203c3a9c75SAmlan Barua #else
2213c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2223c3a9c75SAmlan Barua #endif
223b2b77a04SHong Zhang       break;
224b2b77a04SHong Zhang     }
225b2b77a04SHong Zhang     fftw->finarray  = x_array;
226b2b77a04SHong Zhang     fftw->foutarray = y_array;
227b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
228b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
229b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
230b2b77a04SHong Zhang   } else { /* use existing plan */
231b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
232b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
233b2b77a04SHong Zhang     } else {
234b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
235b2b77a04SHong Zhang     }
236b2b77a04SHong Zhang   }
237b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
238b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
239b2b77a04SHong Zhang   PetscFunctionReturn(0);
240b2b77a04SHong Zhang }
241b2b77a04SHong Zhang 
24297504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT
24397504071SAmlan Barua 
24497504071SAmlan Barua    Input parameter:
24597504071SAmlan Barua    mat - the matrix
24697504071SAmlan Barua    x   - the vector on which BDFT will be performed
24797504071SAmlan Barua 
24897504071SAmlan Barua    Output parameter:
24997504071SAmlan Barua    y   - vector that stores result of BDFT
25097504071SAmlan Barua 
25197504071SAmlan Barua */
252b2b77a04SHong Zhang #undef __FUNCT__
253b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
254b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
255b2b77a04SHong Zhang {
256b2b77a04SHong Zhang   PetscErrorCode ierr;
257b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
258b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
259b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
260c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
261b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
262b2b77a04SHong Zhang 
263b2b77a04SHong Zhang   PetscFunctionBegin;
264b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
265b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
266b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
267b2b77a04SHong Zhang     switch (ndim){
268b2b77a04SHong Zhang     case 1:
2693c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
270b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
271ae0a50aaSHong Zhang #else
272ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
2733c3a9c75SAmlan Barua #endif
274b2b77a04SHong Zhang       break;
275b2b77a04SHong Zhang     case 2:
27697504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
277b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2783c3a9c75SAmlan Barua #else
2793c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2803c3a9c75SAmlan Barua #endif
281b2b77a04SHong Zhang       break;
282b2b77a04SHong Zhang     case 3:
2833c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
284b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2853c3a9c75SAmlan Barua #else
2863c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2873c3a9c75SAmlan Barua #endif
288b2b77a04SHong Zhang       break;
289b2b77a04SHong Zhang     default:
2903c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
291c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2923c3a9c75SAmlan Barua #else
2933c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2943c3a9c75SAmlan Barua #endif
295b2b77a04SHong Zhang       break;
296b2b77a04SHong Zhang     }
297b2b77a04SHong Zhang     fftw->binarray  = x_array;
298b2b77a04SHong Zhang     fftw->boutarray = y_array;
299b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
300b2b77a04SHong Zhang   } else { /* use existing plan */
301b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
302b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
303b2b77a04SHong Zhang     } else {
304b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
305b2b77a04SHong Zhang     }
306b2b77a04SHong Zhang   }
307b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
308b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
309b2b77a04SHong Zhang   PetscFunctionReturn(0);
310b2b77a04SHong Zhang }
311b2b77a04SHong Zhang 
312b2b77a04SHong Zhang #undef __FUNCT__
313b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
314b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
315b2b77a04SHong Zhang {
316b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
317b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
318b2b77a04SHong Zhang   PetscErrorCode ierr;
319b2b77a04SHong Zhang 
320b2b77a04SHong Zhang   PetscFunctionBegin;
321b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
322b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
323b1301b2eSHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
324bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
325b2b77a04SHong Zhang   PetscFunctionReturn(0);
326b2b77a04SHong Zhang }
327b2b77a04SHong Zhang 
328c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
329b2b77a04SHong Zhang #undef __FUNCT__
330b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
331b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
332b2b77a04SHong Zhang {
333b2b77a04SHong Zhang   PetscErrorCode  ierr;
334b2b77a04SHong Zhang   PetscScalar     *array;
335b2b77a04SHong Zhang 
336b2b77a04SHong Zhang   PetscFunctionBegin;
337b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
338b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
339b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
340b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
341b2b77a04SHong Zhang   PetscFunctionReturn(0);
342b2b77a04SHong Zhang }
343b2b77a04SHong Zhang 
3443c3a9c75SAmlan Barua 
3454f7415efSAmlan Barua #undef __FUNCT__
3464be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW"
3474be45526SHong Zhang /*@
348b2aa233eSHong Zhang    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
3494f7415efSAmlan Barua      parallel layout determined by FFTW
3504f7415efSAmlan Barua 
3514f7415efSAmlan Barua    Collective on Mat
3524f7415efSAmlan Barua 
3534f7415efSAmlan Barua    Input Parameter:
3544f7415efSAmlan Barua .  mat - the matrix
3554f7415efSAmlan Barua 
3564f7415efSAmlan Barua    Output Parameter:
3574f7415efSAmlan Barua +   fin - (optional) input vector of forward FFTW
3584f7415efSAmlan Barua -   fout - (optional) output vector of forward FFTW
35997504071SAmlan Barua -   bout - (optional) output vector of backward FFTW
3604f7415efSAmlan Barua 
3614f7415efSAmlan Barua   Level: advanced
36297504071SAmlan Barua   Note: The parallel layout of output of forward FFTW is always same as the input
36397504071SAmlan Barua         of the backward FFTW. But parallel layout of the input vector of forward
36497504071SAmlan Barua         FFTW might not be same as the output of backward FFTW.
36597504071SAmlan Barua         Also note that we need to provide enough space while doing parallel real transform.
36697504071SAmlan Barua         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
36797504071SAmlan Barua         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
36897504071SAmlan Barua         last dimension from n to n/2+1 while invoking this routine.
3694f7415efSAmlan Barua 
3704f7415efSAmlan Barua .seealso: MatCreateFFTW()
3714be45526SHong Zhang @*/
3724be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
3734be45526SHong Zhang {
3744be45526SHong Zhang   PetscErrorCode ierr;
375b4c3921fSHong Zhang 
3764be45526SHong Zhang   PetscFunctionBegin;
3774be45526SHong Zhang   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
3784be45526SHong Zhang   PetscFunctionReturn(0);
3794be45526SHong Zhang }
3804be45526SHong Zhang 
3814f7415efSAmlan Barua EXTERN_C_BEGIN
3824be45526SHong Zhang #undef __FUNCT__
3834be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW"
3844be45526SHong Zhang PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
3854f7415efSAmlan Barua {
3864f7415efSAmlan Barua   PetscErrorCode ierr;
3874f7415efSAmlan Barua   PetscMPIInt    size,rank;
3884f7415efSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
3894f7415efSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
3904f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
3919496c9aeSAmlan Barua   PetscInt       N=fft->N;
3924f7415efSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
3934f7415efSAmlan Barua 
3944f7415efSAmlan Barua   PetscFunctionBegin;
3954f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
3964f7415efSAmlan Barua   PetscValidType(A,1);
3974f7415efSAmlan Barua 
3984f7415efSAmlan Barua   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
3994f7415efSAmlan Barua   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
4004f7415efSAmlan Barua   if (size == 1){ /* sequential case */
4014f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4024f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
4034f7415efSAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
4044f7415efSAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
4054f7415efSAmlan Barua #else
4068ca4c034SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
4078ca4c034SAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
4088ca4c034SAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
4094f7415efSAmlan Barua #endif
41097504071SAmlan Barua   } else { /* parallel cases */
4114f7415efSAmlan Barua     ptrdiff_t      alloc_local,local_n0,local_0_start;
4129496c9aeSAmlan Barua     ptrdiff_t      local_n1;
4139496c9aeSAmlan Barua     fftw_complex   *data_fout;
4149496c9aeSAmlan Barua     ptrdiff_t      local_1_start;
4159496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4169496c9aeSAmlan Barua     fftw_complex   *data_fin,*data_bout;
4179496c9aeSAmlan Barua #else
4184f7415efSAmlan Barua     double         *data_finr,*data_boutr;
4199496c9aeSAmlan Barua     PetscInt       n1,N1,vsize;
4209496c9aeSAmlan Barua     ptrdiff_t      temp;
4219496c9aeSAmlan Barua #endif
4229496c9aeSAmlan Barua 
4234f7415efSAmlan Barua     switch (ndim){
4244f7415efSAmlan Barua           case 1:
42557625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
42657625b84SAmlan Barua                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
42757625b84SAmlan Barua #else
42857625b84SAmlan Barua                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
42957625b84SAmlan Barua                  if (fin) {
43057625b84SAmlan Barua                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
43157625b84SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
43257625b84SAmlan Barua                          (*fin)->ops->destroy = VecDestroy_MPIFFTW;
43357625b84SAmlan Barua                          }
43457625b84SAmlan Barua                  if (fout) {
43557625b84SAmlan Barua                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
43657625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
43757625b84SAmlan Barua                           (*fout)->ops->destroy = VecDestroy_MPIFFTW;
43857625b84SAmlan Barua                          }
43957625b84SAmlan Barua                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
44057625b84SAmlan Barua                  if (bout) {
44157625b84SAmlan Barua                           data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
44257625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
44357625b84SAmlan Barua                           (*bout)->ops->destroy = VecDestroy_MPIFFTW;
44457625b84SAmlan Barua                          }
44557625b84SAmlan Barua           break;
44657625b84SAmlan Barua #endif
4474f7415efSAmlan Barua           case 2:
44897504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
4494f7415efSAmlan Barua                  alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4504f7415efSAmlan Barua                  N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
4514f7415efSAmlan Barua                  if (fin) {
4524f7415efSAmlan Barua                            data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4534f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
4544f7415efSAmlan Barua                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4554f7415efSAmlan Barua                           }
4564f7415efSAmlan Barua                  if (bout) {
4574f7415efSAmlan Barua                            data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4584f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
4594f7415efSAmlan Barua                            ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr);
4604f7415efSAmlan Barua                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4614f7415efSAmlan Barua                           }
462c8a8a4f0SAmlan Barua                  //n1 = 2*local_n1*dim[0];
46357625b84SAmlan Barua                  if (fout) {
46457625b84SAmlan Barua                             data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4659496c9aeSAmlan Barua                             ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
46657625b84SAmlan Barua                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
46757625b84SAmlan Barua                            }
4684f7415efSAmlan Barua #else
4694f7415efSAmlan Barua       /* Get local size */
4704f7415efSAmlan Barua                  alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
4714f7415efSAmlan Barua                  if (fin) {
4724f7415efSAmlan Barua                            data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4734f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
4744f7415efSAmlan Barua                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4754f7415efSAmlan Barua                           }
4764f7415efSAmlan Barua                  if (fout) {
4774f7415efSAmlan Barua                             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4784f7415efSAmlan Barua                             ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
4794f7415efSAmlan Barua                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
4804f7415efSAmlan Barua                            }
4814f7415efSAmlan Barua                  if (bout) {
4824f7415efSAmlan Barua                            data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4834f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
4844f7415efSAmlan Barua                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4854f7415efSAmlan Barua                           }
4864f7415efSAmlan Barua #endif
4874f7415efSAmlan Barua           break;
4884f7415efSAmlan Barua           case 3:
4894f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4904f7415efSAmlan Barua                  alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4914f7415efSAmlan Barua                  N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
4924f7415efSAmlan Barua                  if (fin) {
4934f7415efSAmlan Barua                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4944f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
4954f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4964f7415efSAmlan Barua                          }
4974f7415efSAmlan Barua                  if (bout) {
4984f7415efSAmlan Barua                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4994f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
5004f7415efSAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5014f7415efSAmlan Barua                           }
502c8a8a4f0SAmlan Barua                  //n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
50357625b84SAmlan Barua                  if (fout) {
50457625b84SAmlan Barua                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
50557625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
50657625b84SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
50757625b84SAmlan Barua                           }
5084f7415efSAmlan Barua #else
5090c9b18e4SAmlan Barua                  alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
5100c9b18e4SAmlan Barua                  if (fin) {
5110c9b18e4SAmlan Barua                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5120c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5130c9b18e4SAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5140c9b18e4SAmlan Barua                          }
5150c9b18e4SAmlan Barua                  if (fout) {
5160c9b18e4SAmlan Barua                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5170c9b18e4SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5180c9b18e4SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5190c9b18e4SAmlan Barua                          }
5200c9b18e4SAmlan Barua                  if (bout) {
5210c9b18e4SAmlan Barua                          data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5220c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5230c9b18e4SAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5240c9b18e4SAmlan Barua                          }
5254f7415efSAmlan Barua #endif
5264f7415efSAmlan Barua           break;
5274f7415efSAmlan Barua           default:
5284f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5294f7415efSAmlan Barua                  temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
5304f7415efSAmlan Barua                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
5314f7415efSAmlan Barua                  alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5324f7415efSAmlan Barua                  N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
5334f7415efSAmlan Barua                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
5344f7415efSAmlan Barua 
5354f7415efSAmlan Barua                  if (fin) {
5364f7415efSAmlan Barua                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5374f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
5384f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5394f7415efSAmlan Barua                         }
5404f7415efSAmlan Barua                  if (bout) {
5414f7415efSAmlan Barua                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5424f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
5439496c9aeSAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5444f7415efSAmlan Barua                         }
545c8a8a4f0SAmlan Barua                  //temp = fftw->partial_dim;
546c8a8a4f0SAmlan Barua                  //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
547c8a8a4f0SAmlan Barua                  //n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
54857625b84SAmlan Barua                  if (fout) {
54957625b84SAmlan Barua                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
55057625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
55157625b84SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
55257625b84SAmlan Barua                         }
5534f7415efSAmlan Barua #else
5540c9b18e4SAmlan Barua                 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
5550c9b18e4SAmlan Barua                 if (fin) {
5560c9b18e4SAmlan Barua                        data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5570c9b18e4SAmlan Barua                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5580c9b18e4SAmlan Barua                        (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5590c9b18e4SAmlan Barua                        }
5600c9b18e4SAmlan Barua                 if (fout) {
5610c9b18e4SAmlan Barua                          data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5620c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5630c9b18e4SAmlan Barua                          (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5640c9b18e4SAmlan Barua                        }
5650c9b18e4SAmlan Barua                 if (bout) {
5660c9b18e4SAmlan Barua                        data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5670c9b18e4SAmlan Barua                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5680c9b18e4SAmlan Barua                        (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5690c9b18e4SAmlan Barua                        }
5704f7415efSAmlan Barua #endif
5714f7415efSAmlan Barua             break;
5724f7415efSAmlan Barua           }
5734f7415efSAmlan Barua   }
5744f7415efSAmlan Barua   PetscFunctionReturn(0);
5754f7415efSAmlan Barua }
5764f7415efSAmlan Barua EXTERN_C_END
5774f7415efSAmlan Barua 
578c92418ccSAmlan Barua #undef __FUNCT__
57974a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW"
58074a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
5813c3a9c75SAmlan Barua {
5823c3a9c75SAmlan Barua   PetscErrorCode ierr;
5833c3a9c75SAmlan Barua   PetscFunctionBegin;
58474a26884SAmlan Barua   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
5853c3a9c75SAmlan Barua   PetscFunctionReturn(0);
5863c3a9c75SAmlan Barua }
58754efbe56SHong Zhang 
588b2b77a04SHong Zhang /*
58997504071SAmlan Barua       VecScatterPetscToFFTW_FFTW - Copies the user data to the vector that goes into FFTW block.
5903c3a9c75SAmlan Barua   Input A, x, y
5913c3a9c75SAmlan Barua   A - FFTW matrix
59254dd5118SAmlan Barua   x - user data
593b2b77a04SHong Zhang   Options Database Keys:
594b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags
595b2b77a04SHong Zhang 
596b2b77a04SHong Zhang    Level: intermediate
59797504071SAmlan Barua    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
59897504071SAmlan Barua          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
59997504071SAmlan Barua          zeros. This routine does that job by scattering operation.
60097504071SAmlan Barua 
601b2b77a04SHong Zhang 
602b2b77a04SHong Zhang */
6033c3a9c75SAmlan Barua 
6043c3a9c75SAmlan Barua EXTERN_C_BEGIN
6053c3a9c75SAmlan Barua #undef __FUNCT__
60674a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW_FTTW"
60774a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
6083c3a9c75SAmlan Barua {
6093c3a9c75SAmlan Barua   PetscErrorCode ierr;
6103c3a9c75SAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
6113c3a9c75SAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
6123c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
6139496c9aeSAmlan Barua   PetscInt       N=fft->N;
614*b5d11533SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
6159496c9aeSAmlan Barua   PetscInt       low;
6168ca4c034SAmlan Barua   PetscInt       rank,size,vsize,vsize1;
6173c3a9c75SAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
6189496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
6193c3a9c75SAmlan Barua   VecScatter     vecscat;
6203c3a9c75SAmlan Barua   IS             list1,list2;
6219496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6229496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
6239496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
6249496c9aeSAmlan Barua   PetscInt       N1, n1 ,NM;
6259496c9aeSAmlan Barua   ptrdiff_t      temp;
6269496c9aeSAmlan Barua #endif
6273c3a9c75SAmlan Barua 
628f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
629f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
6303c3a9c75SAmlan Barua   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
6313c3a9c75SAmlan Barua 
632e81bb599SAmlan Barua   if (size==1)
633e81bb599SAmlan Barua     {
6348ca4c034SAmlan Barua           ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
6358ca4c034SAmlan Barua           ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
6369496c9aeSAmlan Barua           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
6376971673cSAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
6386971673cSAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
6396971673cSAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
6406971673cSAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
641b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
642e81bb599SAmlan Barua     }
643e81bb599SAmlan Barua 
644e81bb599SAmlan Barua  else{
645e81bb599SAmlan Barua 
6463c3a9c75SAmlan Barua  switch (ndim){
6473c3a9c75SAmlan Barua  case 1:
64864657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
64964657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
65064657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
65164657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
65264657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
65364657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
65464657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
65564657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
65664657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
65764657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
65864657d84SAmlan Barua #else
659e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
66064657d84SAmlan Barua #endif
6613c3a9c75SAmlan Barua  break;
6623c3a9c75SAmlan Barua  case 2:
663bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
664bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
665bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
666bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
667bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
668bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
669bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
670bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
671bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
672bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
673bd59e6c6SAmlan Barua #else
6743c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
6753c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
6769496c9aeSAmlan Barua       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
6779496c9aeSAmlan Barua       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
6789496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
6799496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
6803c3a9c75SAmlan Barua 
6813c3a9c75SAmlan Barua       if (dim[1]%2==0)
6823c3a9c75SAmlan Barua         NM = dim[1]+2;
6833c3a9c75SAmlan Barua       else
6843c3a9c75SAmlan Barua         NM = dim[1]+1;
6853c3a9c75SAmlan Barua 
6863c3a9c75SAmlan Barua       for (i=0;i<local_n0;i++){
6873c3a9c75SAmlan Barua          for (j=0;j<dim[1];j++){
6883c3a9c75SAmlan Barua             tempindx = i*dim[1] + j;
6893c3a9c75SAmlan Barua             tempindx1 = i*NM + j;
6905b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
6913c3a9c75SAmlan Barua             indx2[tempindx]=low+tempindx1;
6923c3a9c75SAmlan Barua         }
6933c3a9c75SAmlan Barua      }
6943c3a9c75SAmlan Barua 
6953c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
6963c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
6973c3a9c75SAmlan Barua 
698f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
699f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
700f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
701f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
702b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
703b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
704b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
705b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
706bd59e6c6SAmlan Barua #endif
7079496c9aeSAmlan Barua  break;
7083c3a9c75SAmlan Barua 
7093c3a9c75SAmlan Barua  case 3:
710bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
711bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
712bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
713bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
714bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
715bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
716bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
717bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
718bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
719bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
720bd59e6c6SAmlan Barua #else
72151d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
72251d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
72351d1eed7SAmlan Barua 
7249496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
7259496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
72651d1eed7SAmlan Barua 
72751d1eed7SAmlan Barua       if (dim[2]%2==0)
72851d1eed7SAmlan Barua         NM = dim[2]+2;
72951d1eed7SAmlan Barua       else
73051d1eed7SAmlan Barua         NM = dim[2]+1;
73151d1eed7SAmlan Barua 
73251d1eed7SAmlan Barua       for (i=0;i<local_n0;i++){
73351d1eed7SAmlan Barua          for (j=0;j<dim[1];j++){
73451d1eed7SAmlan Barua             for (k=0;k<dim[2];k++){
73551d1eed7SAmlan Barua                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
73651d1eed7SAmlan Barua                tempindx1 = i*dim[1]*NM + j*NM + k;
73751d1eed7SAmlan Barua                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
73851d1eed7SAmlan Barua                indx2[tempindx]=low+tempindx1;
73951d1eed7SAmlan Barua             }
74051d1eed7SAmlan Barua          }
74151d1eed7SAmlan Barua       }
74251d1eed7SAmlan Barua 
74351d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
74451d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
74551d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
74651d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
74751d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
74851d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
749b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
750b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
751b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
752b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
753bd59e6c6SAmlan Barua #endif
7549496c9aeSAmlan Barua  break;
7553c3a9c75SAmlan Barua 
7563c3a9c75SAmlan Barua  default:
757bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
758bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
759bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
760bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
761bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
762bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
763bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
764bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
765bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
766bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
767bd59e6c6SAmlan Barua #else
768e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
769e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
770e5d7f247SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
771e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
772e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
773e5d7f247SAmlan Barua 
774e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
775e5d7f247SAmlan Barua 
776e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
777e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
778e5d7f247SAmlan Barua 
779e5d7f247SAmlan Barua       if (dim[ndim-1]%2==0)
780ba6e06d0SAmlan Barua         NM = 2;
781e5d7f247SAmlan Barua       else
782ba6e06d0SAmlan Barua         NM = 1;
783e5d7f247SAmlan Barua 
7846971673cSAmlan Barua       j = low;
7856971673cSAmlan Barua       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
7866971673cSAmlan Barua          {
7876971673cSAmlan Barua           indx1[i] = local_0_start*partial_dim + i;
7886971673cSAmlan Barua           indx2[i] = j;
7896971673cSAmlan Barua           if (k%dim[ndim-1]==0)
7906971673cSAmlan Barua             { j+=NM;}
7916971673cSAmlan Barua           j++;
7926971673cSAmlan Barua          }
7936971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
7946971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
7956971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
7966971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7976971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7986971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
799b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
800b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
801b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
802b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
803bd59e6c6SAmlan Barua #endif
8049496c9aeSAmlan Barua  break;
8053c3a9c75SAmlan Barua   }
806e81bb599SAmlan Barua  }
8071abc6020SAmlan Barua  return(0);
8083c3a9c75SAmlan Barua }
8093c3a9c75SAmlan Barua EXTERN_C_END
8103c3a9c75SAmlan Barua 
8113c3a9c75SAmlan Barua #undef __FUNCT__
81274a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc"
81374a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
8143c3a9c75SAmlan Barua {
8153c3a9c75SAmlan Barua   PetscErrorCode ierr;
8163c3a9c75SAmlan Barua   PetscFunctionBegin;
81774a26884SAmlan Barua   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
8183c3a9c75SAmlan Barua   PetscFunctionReturn(0);
8193c3a9c75SAmlan Barua }
82054efbe56SHong Zhang 
8215b3e41ffSAmlan Barua /*
82297504071SAmlan Barua       VecScatterFFTWToPetsc_FFTW - Converts FFTW output to the PETSc vector that user can use.
8235b3e41ffSAmlan Barua   Input A, x, y
8245b3e41ffSAmlan Barua   A - FFTW matrix
8255b3e41ffSAmlan Barua   x - FFTW vector
8265b3e41ffSAmlan Barua   y - PETSc vector that user can read
8275b3e41ffSAmlan Barua 
8285b3e41ffSAmlan Barua    Level: intermediate
82997504071SAmlan Barua    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
83097504071SAmlan Barua          VecScatterFFTWToPetsc removes those extra zeros.
8315b3e41ffSAmlan Barua 
8323c3a9c75SAmlan Barua */
8333c3a9c75SAmlan Barua 
8343c3a9c75SAmlan Barua EXTERN_C_BEGIN
8353c3a9c75SAmlan Barua #undef __FUNCT__
83674a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
83774a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
8385b3e41ffSAmlan Barua {
8395b3e41ffSAmlan Barua   PetscErrorCode ierr;
8405b3e41ffSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
8415b3e41ffSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
8425b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
8439496c9aeSAmlan Barua   PetscInt       N=fft->N;
844b3655f67SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
8459496c9aeSAmlan Barua   PetscInt       low;
8469496c9aeSAmlan Barua   PetscInt       rank,size;
8475b3e41ffSAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
8489496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
8495b3e41ffSAmlan Barua   VecScatter     vecscat;
8505b3e41ffSAmlan Barua   IS             list1,list2;
8519496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
8529496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
8539496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
8549496c9aeSAmlan Barua   PetscInt       N1, n1 ,NM;
8559496c9aeSAmlan Barua   ptrdiff_t      temp;
8569496c9aeSAmlan Barua #endif
8579496c9aeSAmlan Barua 
8585b3e41ffSAmlan Barua 
8595b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
8605b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
861b3655f67SAmlan Barua   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr);
8625b3e41ffSAmlan Barua 
863e81bb599SAmlan Barua   if (size==1){
864b3655f67SAmlan Barua     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
8656971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
8666971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8676971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8686971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
869b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
870e81bb599SAmlan Barua   }
871e81bb599SAmlan Barua   else{
872e81bb599SAmlan Barua 
873e81bb599SAmlan Barua   switch (ndim){
874e81bb599SAmlan Barua   case 1:
87564657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
87664657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
8779496c9aeSAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1);
8789496c9aeSAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2);
87964657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
88064657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
88164657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
88264657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
88364657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
88464657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
88564657d84SAmlan Barua #else
8866a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
88764657d84SAmlan Barua #endif
8885b3e41ffSAmlan Barua   break;
8895b3e41ffSAmlan Barua   case 2:
890bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
891bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
892bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
893bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
894bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
895bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
896bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
897bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
898bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
899bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
900bd59e6c6SAmlan Barua #else
9015b3e41ffSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
9025b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
9035b3e41ffSAmlan Barua 
9049496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
9059496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
9065b3e41ffSAmlan Barua 
9075b3e41ffSAmlan Barua       if (dim[1]%2==0)
9085b3e41ffSAmlan Barua         NM = dim[1]+2;
9095b3e41ffSAmlan Barua       else
9105b3e41ffSAmlan Barua         NM = dim[1]+1;
9115b3e41ffSAmlan Barua 
9125b3e41ffSAmlan Barua       for (i=0;i<local_n0;i++){
9135b3e41ffSAmlan Barua          for (j=0;j<dim[1];j++){
9145b3e41ffSAmlan Barua             tempindx = i*dim[1] + j;
9155b3e41ffSAmlan Barua             tempindx1 = i*NM + j;
9165b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
9175b3e41ffSAmlan Barua             indx2[tempindx]=low+tempindx1;
9185b3e41ffSAmlan Barua          }
9195b3e41ffSAmlan Barua        }
9205b3e41ffSAmlan Barua 
9215b3e41ffSAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
9225b3e41ffSAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
9235b3e41ffSAmlan Barua 
9245b3e41ffSAmlan Barua        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
9255b3e41ffSAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9265b3e41ffSAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9275b3e41ffSAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
928b223cc97SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
929b223cc97SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
930b223cc97SAmlan Barua        ierr = PetscFree(indx1);CHKERRQ(ierr);
931b223cc97SAmlan Barua        ierr = PetscFree(indx2);CHKERRQ(ierr);
932bd59e6c6SAmlan Barua #endif
9339496c9aeSAmlan Barua   break;
9345b3e41ffSAmlan Barua   case 3:
935bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
936bd59e6c6SAmlan Barua        alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
937bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
938bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
939bd59e6c6SAmlan Barua        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
940bd59e6c6SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
941bd59e6c6SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
942bd59e6c6SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
943bd59e6c6SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
944bd59e6c6SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
945bd59e6c6SAmlan Barua #else
946bd59e6c6SAmlan Barua 
94751d1eed7SAmlan Barua        alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
94851d1eed7SAmlan Barua        N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
94951d1eed7SAmlan Barua 
9509496c9aeSAmlan Barua //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
9519496c9aeSAmlan Barua //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
9529496c9aeSAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
9539496c9aeSAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
95451d1eed7SAmlan Barua 
95551d1eed7SAmlan Barua        if (dim[2]%2==0)
95651d1eed7SAmlan Barua         NM = dim[2]+2;
95751d1eed7SAmlan Barua        else
95851d1eed7SAmlan Barua         NM = dim[2]+1;
95951d1eed7SAmlan Barua 
96051d1eed7SAmlan Barua        for (i=0;i<local_n0;i++){
96151d1eed7SAmlan Barua           for (j=0;j<dim[1];j++){
96251d1eed7SAmlan Barua              for (k=0;k<dim[2];k++){
96351d1eed7SAmlan Barua                 tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
96451d1eed7SAmlan Barua                 tempindx1 = i*dim[1]*NM + j*NM + k;
96551d1eed7SAmlan Barua                 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
96651d1eed7SAmlan Barua                 indx2[tempindx]=low+tempindx1;
96751d1eed7SAmlan Barua              }
96851d1eed7SAmlan Barua           }
96951d1eed7SAmlan Barua        }
97051d1eed7SAmlan Barua 
97151d1eed7SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
97251d1eed7SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
97351d1eed7SAmlan Barua 
97451d1eed7SAmlan Barua        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
97551d1eed7SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
97651d1eed7SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
97751d1eed7SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
978b223cc97SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
979b223cc97SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
980b223cc97SAmlan Barua        ierr = PetscFree(indx1);CHKERRQ(ierr);
981b223cc97SAmlan Barua        ierr = PetscFree(indx2);CHKERRQ(ierr);
982bd59e6c6SAmlan Barua #endif
9839496c9aeSAmlan Barua   break;
9845b3e41ffSAmlan Barua   default:
985bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
986bd59e6c6SAmlan Barua        alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
987bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
988bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
989bd59e6c6SAmlan Barua        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
990bd59e6c6SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
991bd59e6c6SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992bd59e6c6SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
993bd59e6c6SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
994bd59e6c6SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
995bd59e6c6SAmlan Barua #else
996ba6e06d0SAmlan Barua        temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
997ba6e06d0SAmlan Barua        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
998ba6e06d0SAmlan Barua        alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
999ba6e06d0SAmlan Barua        N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1000ba6e06d0SAmlan Barua        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1001ba6e06d0SAmlan Barua 
1002ba6e06d0SAmlan Barua        partial_dim = fftw->partial_dim;
1003ba6e06d0SAmlan Barua 
1004ba6e06d0SAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1005ba6e06d0SAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1006ba6e06d0SAmlan Barua 
1007ba6e06d0SAmlan Barua        if (dim[ndim-1]%2==0)
1008ba6e06d0SAmlan Barua          NM = 2;
1009ba6e06d0SAmlan Barua        else
1010ba6e06d0SAmlan Barua          NM = 1;
1011ba6e06d0SAmlan Barua 
1012ba6e06d0SAmlan Barua        j = low;
1013ba6e06d0SAmlan Barua        for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1014ba6e06d0SAmlan Barua           {
1015ba6e06d0SAmlan Barua            indx1[i] = local_0_start*partial_dim + i;
1016ba6e06d0SAmlan Barua            indx2[i] = j;
1017ba6e06d0SAmlan Barua            if (k%dim[ndim-1]==0)
1018ba6e06d0SAmlan Barua              { j+=NM;}
1019ba6e06d0SAmlan Barua            j++;
1020ba6e06d0SAmlan Barua           }
1021ba6e06d0SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1022ba6e06d0SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1023ba6e06d0SAmlan Barua 
1024ba6e06d0SAmlan Barua        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1025ba6e06d0SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1026ba6e06d0SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1027ba6e06d0SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1028b223cc97SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1029b223cc97SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1030b223cc97SAmlan Barua        ierr = PetscFree(indx1);CHKERRQ(ierr);
1031b223cc97SAmlan Barua        ierr = PetscFree(indx2);CHKERRQ(ierr);
1032bd59e6c6SAmlan Barua #endif
10339496c9aeSAmlan Barua   break;
10345b3e41ffSAmlan Barua   }
1035e81bb599SAmlan Barua   }
10365b3e41ffSAmlan Barua   return 0;
10375b3e41ffSAmlan Barua }
10385b3e41ffSAmlan Barua EXTERN_C_END
10395b3e41ffSAmlan Barua 
10405b3e41ffSAmlan Barua EXTERN_C_BEGIN
10415b3e41ffSAmlan Barua #undef __FUNCT__
10423c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
10433c3a9c75SAmlan Barua /*
10443c3a9c75SAmlan Barua       MatCreate_FFTW - Creates a matrix object that provides FFT
10453c3a9c75SAmlan Barua   via the external package FFTW
10463c3a9c75SAmlan Barua   Options Database Keys:
10473c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
10483c3a9c75SAmlan Barua 
10493c3a9c75SAmlan Barua    Level: intermediate
10503c3a9c75SAmlan Barua 
10513c3a9c75SAmlan Barua */
10523c3a9c75SAmlan Barua 
1053b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A)
1054b2b77a04SHong Zhang {
1055b2b77a04SHong Zhang   PetscErrorCode ierr;
1056b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
1057b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1058b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1059b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1060b2b77a04SHong Zhang   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1061b2b77a04SHong Zhang   PetscBool      flg;
10624f48bc67SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr;
106311902ff2SHong Zhang   PetscMPIInt    size,rank;
10649496c9aeSAmlan Barua   ptrdiff_t      *pdim;
10659496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
10669496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
10679496c9aeSAmlan Barua    ptrdiff_t     temp;
10688ca4c034SAmlan Barua    PetscInt      N1,tot_dim;
10694f48bc67SAmlan Barua #else
10704f48bc67SAmlan Barua    PetscInt n1;
10719496c9aeSAmlan Barua #endif
10729496c9aeSAmlan Barua 
1073b2b77a04SHong Zhang   PetscFunctionBegin;
1074b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
107511902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1076c92418ccSAmlan Barua 
107711902ff2SHong Zhang   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
107811902ff2SHong Zhang   pdim[0] = dim[0];
10798ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
10808ca4c034SAmlan Barua   tot_dim = 2*dim[0];
10818ca4c034SAmlan Barua #endif
108211902ff2SHong Zhang   for (ctr=1;ctr<ndim;ctr++)
108311902ff2SHong Zhang      {
10846882372aSHong Zhang           partial_dim *= dim[ctr];
108511902ff2SHong Zhang           pdim[ctr] = dim[ctr];
10868ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
10878ca4c034SAmlan Barua           if (ctr==ndim-1)
10888ca4c034SAmlan Barua             tot_dim *= (dim[ctr]/2+1);
10898ca4c034SAmlan Barua           else
10908ca4c034SAmlan Barua             tot_dim *= dim[ctr];
10918ca4c034SAmlan Barua #endif
10926882372aSHong Zhang      }
10933c3a9c75SAmlan Barua 
1094b2b77a04SHong Zhang   if (size == 1) {
1095e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1096b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1097b2b77a04SHong Zhang     n = N;
1098e81bb599SAmlan Barua #else
1099e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1100e81bb599SAmlan Barua     n = tot_dim;
1101e81bb599SAmlan Barua #endif
1102e81bb599SAmlan Barua 
1103b2b77a04SHong Zhang   } else {
11041abc6020SAmlan Barua     ptrdiff_t alloc_local,local_n0,local_0_start;
1105b2b77a04SHong Zhang     switch (ndim){
1106b2b77a04SHong Zhang     case 1:
11073c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11083c3a9c75SAmlan Barua    SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1109e5d7f247SAmlan Barua #else
11109496c9aeSAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
11116882372aSHong Zhang       n = (PetscInt)local_n0;
11129496c9aeSAmlan Barua       n1 = (PetscInt) local_n1;
11139496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1114e5d7f247SAmlan Barua #endif
1115b2b77a04SHong Zhang       break;
1116b2b77a04SHong Zhang     case 2:
11175b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
1118b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1119b2b77a04SHong Zhang       /*
1120b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1121b2b77a04SHong Zhang        PetscSynchronizedFlush(comm);
1122b2b77a04SHong Zhang        */
1123b2b77a04SHong Zhang       n = (PetscInt)local_n0*dim[1];
1124b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
11255b3e41ffSAmlan Barua #else
11265b3e41ffSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
11275b3e41ffSAmlan Barua       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1128c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
11295b3e41ffSAmlan Barua #endif
1130b2b77a04SHong Zhang       break;
1131b2b77a04SHong Zhang     case 3:
113251d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
113351d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
11346882372aSHong Zhang       n = (PetscInt)local_n0*dim[1]*dim[2];
11356882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
113651d1eed7SAmlan Barua #else
113751d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
113851d1eed7SAmlan Barua       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1139c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
114051d1eed7SAmlan Barua #endif
1141b2b77a04SHong Zhang       break;
1142b2b77a04SHong Zhang     default:
1143b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
114411902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
11456882372aSHong Zhang       n = (PetscInt)local_n0*partial_dim;
11466882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1147b3a17365SAmlan Barua #else
1148b3a17365SAmlan Barua       temp = pdim[ndim-1];
1149b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
1150b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1151b3a17365SAmlan Barua       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1152b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1153b3a17365SAmlan Barua       pdim[ndim-1] = temp;
1154c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1155b3a17365SAmlan Barua #endif
1156b2b77a04SHong Zhang       break;
1157b2b77a04SHong Zhang     }
1158b2b77a04SHong Zhang   }
1159b2b77a04SHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1160b2b77a04SHong Zhang   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1161b2b77a04SHong Zhang   fft->data = (void*)fftw;
1162b2b77a04SHong Zhang 
1163b2b77a04SHong Zhang   fft->n           = n;
1164c92418ccSAmlan Barua   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1165e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
1166c92418ccSAmlan Barua   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1167b1301b2eSHong Zhang   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1168c92418ccSAmlan Barua 
1169b2b77a04SHong Zhang   fftw->p_forward  = 0;
1170b2b77a04SHong Zhang   fftw->p_backward = 0;
1171b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1172b2b77a04SHong Zhang 
1173b2b77a04SHong Zhang   if (size == 1){
1174b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1175b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1176b2b77a04SHong Zhang   } else {
1177b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1178b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1179b2b77a04SHong Zhang   }
1180b2b77a04SHong Zhang   fft->matdestroy          = MatDestroy_FFTW;
1181b2b77a04SHong Zhang   A->assembled          = PETSC_TRUE;
11824be45526SHong Zhang   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);
118374a26884SAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW);
118474a26884SAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW);
1185b2b77a04SHong Zhang 
1186b2b77a04SHong Zhang   /* get runtime options */
1187b2b77a04SHong Zhang   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1188b2b77a04SHong Zhang     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1189b2b77a04SHong Zhang     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1190b2b77a04SHong Zhang   PetscOptionsEnd();
1191b2b77a04SHong Zhang   PetscFunctionReturn(0);
1192b2b77a04SHong Zhang }
1193b2b77a04SHong Zhang EXTERN_C_END
11943c3a9c75SAmlan Barua 
11953c3a9c75SAmlan Barua 
11963c3a9c75SAmlan Barua 
11973c3a9c75SAmlan Barua 
1198