xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision e0ec536eaa25b727af044b8e5330dbb2620074eb)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14e5d7f247SAmlan Barua   PetscInt    partial_dim;
15b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
16b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
19b2b77a04SHong Zhang } Mat_FFTW;
20b2b77a04SHong Zhang 
21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27b2b77a04SHong Zhang 
2897504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel
2997504071SAmlan Barua 
3097504071SAmlan Barua    Input parameter:
3197504071SAmlan Barua    mat - the matrix
3297504071SAmlan Barua    x   - the vector on which FDFT will be performed
3397504071SAmlan Barua 
3497504071SAmlan Barua    Output parameter:
3597504071SAmlan Barua    y   - vector that stores result of FDFT
3697504071SAmlan Barua 
3797504071SAmlan Barua */
38b2b77a04SHong Zhang #undef __FUNCT__
39b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
40b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
41b2b77a04SHong Zhang {
42b2b77a04SHong Zhang   PetscErrorCode ierr;
43b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
44b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
45b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
46b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
47b2b77a04SHong Zhang 
48b2b77a04SHong Zhang   PetscFunctionBegin;
49b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
50b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
51b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
52b2b77a04SHong Zhang     switch (ndim){
53b2b77a04SHong Zhang     case 1:
5458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
55b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
5658a912c5SAmlan Barua #else
5758a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
5858a912c5SAmlan Barua #endif
59b2b77a04SHong Zhang       break;
60b2b77a04SHong Zhang     case 2:
6158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
62b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6358a912c5SAmlan Barua #else
6458a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
6558a912c5SAmlan Barua #endif
66b2b77a04SHong Zhang       break;
67b2b77a04SHong Zhang     case 3:
6858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
69b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
7058a912c5SAmlan Barua #else
7158a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
7258a912c5SAmlan Barua #endif
73b2b77a04SHong Zhang       break;
74b2b77a04SHong Zhang     default:
7558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
76b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
7758a912c5SAmlan Barua #else
7858a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
7958a912c5SAmlan Barua #endif
80b2b77a04SHong Zhang       break;
81b2b77a04SHong Zhang     }
82b2b77a04SHong Zhang     fftw->finarray  = x_array;
83b2b77a04SHong Zhang     fftw->foutarray = y_array;
84b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
85b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
86b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
87b2b77a04SHong Zhang   } else { /* use existing plan */
88b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
89b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
90b2b77a04SHong Zhang     } else {
91b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
92b2b77a04SHong Zhang     }
93b2b77a04SHong Zhang   }
94b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
95b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
96b2b77a04SHong Zhang   PetscFunctionReturn(0);
97b2b77a04SHong Zhang }
98b2b77a04SHong Zhang 
9997504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT
10097504071SAmlan Barua 
10197504071SAmlan Barua    Input parameter:
10297504071SAmlan Barua    mat - the matrix
10397504071SAmlan Barua    x   - the vector on which BDFT will be performed
10497504071SAmlan Barua 
10597504071SAmlan Barua    Output parameter:
10697504071SAmlan Barua    y   - vector that stores result of BDFT
10797504071SAmlan Barua 
10897504071SAmlan Barua */
10997504071SAmlan Barua 
110b2b77a04SHong Zhang #undef __FUNCT__
111b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
112b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
113b2b77a04SHong Zhang {
114b2b77a04SHong Zhang   PetscErrorCode ierr;
115b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
116b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
117b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
118b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
119b2b77a04SHong Zhang 
120b2b77a04SHong Zhang   PetscFunctionBegin;
121b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
122b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
123b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
124b2b77a04SHong Zhang     switch (ndim){
125b2b77a04SHong Zhang     case 1:
12658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
127b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12858a912c5SAmlan Barua #else
129e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
13058a912c5SAmlan Barua #endif
131b2b77a04SHong Zhang       break;
132b2b77a04SHong Zhang     case 2:
13358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
134b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
13558a912c5SAmlan Barua #else
136e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
13758a912c5SAmlan Barua #endif
138b2b77a04SHong Zhang       break;
139b2b77a04SHong Zhang     case 3:
14058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
141b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
14258a912c5SAmlan Barua #else
143e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
14458a912c5SAmlan Barua #endif
145b2b77a04SHong Zhang       break;
146b2b77a04SHong Zhang     default:
14758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
148b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
14958a912c5SAmlan Barua #else
150e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
15158a912c5SAmlan Barua #endif
152b2b77a04SHong Zhang       break;
153b2b77a04SHong Zhang     }
154b2b77a04SHong Zhang     fftw->binarray  = x_array;
155b2b77a04SHong Zhang     fftw->boutarray = y_array;
156b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
157b2b77a04SHong Zhang   } else { /* use existing plan */
158b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
159b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
160b2b77a04SHong Zhang     } else {
161b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
162b2b77a04SHong Zhang     }
163b2b77a04SHong Zhang   }
164b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
165b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
166b2b77a04SHong Zhang   PetscFunctionReturn(0);
167b2b77a04SHong Zhang }
168b2b77a04SHong Zhang 
16997504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel
17097504071SAmlan Barua 
17197504071SAmlan Barua    Input parameter:
17297504071SAmlan Barua    mat - the matrix
17397504071SAmlan Barua    x   - the vector on which FDFT will be performed
17497504071SAmlan Barua 
17597504071SAmlan Barua    Output parameter:
17697504071SAmlan Barua    y   - vector that stores result of FDFT
17797504071SAmlan Barua 
17897504071SAmlan Barua */
17997504071SAmlan Barua 
180b2b77a04SHong Zhang #undef __FUNCT__
181b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
182b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
183b2b77a04SHong Zhang {
184b2b77a04SHong Zhang   PetscErrorCode ierr;
185b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
186b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
187b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
188c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
189b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
190b2b77a04SHong Zhang 
191b2b77a04SHong Zhang   PetscFunctionBegin;
192b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
193b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
194b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
195b2b77a04SHong Zhang     switch (ndim){
196b2b77a04SHong Zhang     case 1:
1973c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
198b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
199ae0a50aaSHong Zhang #else
200ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
2013c3a9c75SAmlan Barua #endif
202b2b77a04SHong Zhang       break;
203b2b77a04SHong Zhang     case 2:
20497504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
205b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2063c3a9c75SAmlan Barua #else
2073c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2083c3a9c75SAmlan Barua #endif
209b2b77a04SHong Zhang       break;
210b2b77a04SHong Zhang     case 3:
2113c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
212b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2133c3a9c75SAmlan Barua #else
21451d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2153c3a9c75SAmlan Barua #endif
216b2b77a04SHong Zhang       break;
217b2b77a04SHong Zhang     default:
2183c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
219c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2203c3a9c75SAmlan Barua #else
2213c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2223c3a9c75SAmlan Barua #endif
223b2b77a04SHong Zhang       break;
224b2b77a04SHong Zhang     }
225b2b77a04SHong Zhang     fftw->finarray  = x_array;
226b2b77a04SHong Zhang     fftw->foutarray = y_array;
227b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
228b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
229b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
230b2b77a04SHong Zhang   } else { /* use existing plan */
231b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
232b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
233b2b77a04SHong Zhang     } else {
234b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
235b2b77a04SHong Zhang     }
236b2b77a04SHong Zhang   }
237b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
238b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
239b2b77a04SHong Zhang   PetscFunctionReturn(0);
240b2b77a04SHong Zhang }
241b2b77a04SHong Zhang 
24297504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT
24397504071SAmlan Barua 
24497504071SAmlan Barua    Input parameter:
24597504071SAmlan Barua    mat - the matrix
24697504071SAmlan Barua    x   - the vector on which BDFT will be performed
24797504071SAmlan Barua 
24897504071SAmlan Barua    Output parameter:
24997504071SAmlan Barua    y   - vector that stores result of BDFT
25097504071SAmlan Barua 
25197504071SAmlan Barua */
252b2b77a04SHong Zhang #undef __FUNCT__
253b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
254b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
255b2b77a04SHong Zhang {
256b2b77a04SHong Zhang   PetscErrorCode ierr;
257b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
258b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
259b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
260c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
261b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
262b2b77a04SHong Zhang 
263b2b77a04SHong Zhang   PetscFunctionBegin;
264b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
265b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
266b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
267b2b77a04SHong Zhang     switch (ndim){
268b2b77a04SHong Zhang     case 1:
2693c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
270b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
271ae0a50aaSHong Zhang #else
272ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
2733c3a9c75SAmlan Barua #endif
274b2b77a04SHong Zhang       break;
275b2b77a04SHong Zhang     case 2:
27697504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
277b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2783c3a9c75SAmlan Barua #else
2793c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2803c3a9c75SAmlan Barua #endif
281b2b77a04SHong Zhang       break;
282b2b77a04SHong Zhang     case 3:
2833c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
284b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2853c3a9c75SAmlan Barua #else
2863c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2873c3a9c75SAmlan Barua #endif
288b2b77a04SHong Zhang       break;
289b2b77a04SHong Zhang     default:
2903c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
291c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2923c3a9c75SAmlan Barua #else
2933c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2943c3a9c75SAmlan Barua #endif
295b2b77a04SHong Zhang       break;
296b2b77a04SHong Zhang     }
297b2b77a04SHong Zhang     fftw->binarray  = x_array;
298b2b77a04SHong Zhang     fftw->boutarray = y_array;
299b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
300b2b77a04SHong Zhang   } else { /* use existing plan */
301b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
302b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
303b2b77a04SHong Zhang     } else {
304b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
305b2b77a04SHong Zhang     }
306b2b77a04SHong Zhang   }
307b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
308b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
309b2b77a04SHong Zhang   PetscFunctionReturn(0);
310b2b77a04SHong Zhang }
311b2b77a04SHong Zhang 
312b2b77a04SHong Zhang #undef __FUNCT__
313b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
314b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
315b2b77a04SHong Zhang {
316b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
317b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
318b2b77a04SHong Zhang   PetscErrorCode ierr;
319b2b77a04SHong Zhang 
320b2b77a04SHong Zhang   PetscFunctionBegin;
321b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
322b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
323b1301b2eSHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
324bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
325b2b77a04SHong Zhang   PetscFunctionReturn(0);
326b2b77a04SHong Zhang }
327b2b77a04SHong Zhang 
328c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
329b2b77a04SHong Zhang #undef __FUNCT__
330b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
331b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
332b2b77a04SHong Zhang {
333b2b77a04SHong Zhang   PetscErrorCode  ierr;
334b2b77a04SHong Zhang   PetscScalar     *array;
335b2b77a04SHong Zhang 
336b2b77a04SHong Zhang   PetscFunctionBegin;
337b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
338b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
339b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
340b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
341b2b77a04SHong Zhang   PetscFunctionReturn(0);
342b2b77a04SHong Zhang }
343b2b77a04SHong Zhang 
3443c3a9c75SAmlan Barua 
3454f7415efSAmlan Barua #undef __FUNCT__
3464be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW"
3474be45526SHong Zhang /*@
348b2aa233eSHong Zhang    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
3494f7415efSAmlan Barua      parallel layout determined by FFTW
3504f7415efSAmlan Barua 
3514f7415efSAmlan Barua    Collective on Mat
3524f7415efSAmlan Barua 
3534f7415efSAmlan Barua    Input Parameter:
3544f7415efSAmlan Barua .  mat - the matrix
3554f7415efSAmlan Barua 
3564f7415efSAmlan Barua    Output Parameter:
3574f7415efSAmlan Barua +   fin - (optional) input vector of forward FFTW
3584f7415efSAmlan Barua -   fout - (optional) output vector of forward FFTW
35997504071SAmlan Barua -   bout - (optional) output vector of backward FFTW
3604f7415efSAmlan Barua 
3614f7415efSAmlan Barua   Level: advanced
36297504071SAmlan Barua   Note: The parallel layout of output of forward FFTW is always same as the input
36397504071SAmlan Barua         of the backward FFTW. But parallel layout of the input vector of forward
36497504071SAmlan Barua         FFTW might not be same as the output of backward FFTW.
36597504071SAmlan Barua         Also note that we need to provide enough space while doing parallel real transform.
36697504071SAmlan Barua         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
36797504071SAmlan Barua         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
368*e0ec536eSAmlan Barua         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
369*e0ec536eSAmlan Barua         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
370*e0ec536eSAmlan Barua         zeros if it is odd only one zero is needed.
371*e0ec536eSAmlan Barua         Lastly one needs some scratch space at the end of data set in each process. alloc_local
372*e0ec536eSAmlan Barua         figures out how much space is needed, i.e. it figures out the data+scratch space for
373*e0ec536eSAmlan Barua         each processor and returns that.
3744f7415efSAmlan Barua 
3754f7415efSAmlan Barua .seealso: MatCreateFFTW()
3764be45526SHong Zhang @*/
3774be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
3784be45526SHong Zhang {
3794be45526SHong Zhang   PetscErrorCode ierr;
380b4c3921fSHong Zhang 
3814be45526SHong Zhang   PetscFunctionBegin;
3824be45526SHong Zhang   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
3834be45526SHong Zhang   PetscFunctionReturn(0);
3844be45526SHong Zhang }
3854be45526SHong Zhang 
3864f7415efSAmlan Barua EXTERN_C_BEGIN
3874be45526SHong Zhang #undef __FUNCT__
3884be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW"
3894be45526SHong Zhang PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
3904f7415efSAmlan Barua {
3914f7415efSAmlan Barua   PetscErrorCode ierr;
3924f7415efSAmlan Barua   PetscMPIInt    size,rank;
3934f7415efSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
3944f7415efSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
3954f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
3969496c9aeSAmlan Barua   PetscInt       N=fft->N;
3974f7415efSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
3984f7415efSAmlan Barua 
3994f7415efSAmlan Barua   PetscFunctionBegin;
4004f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
4014f7415efSAmlan Barua   PetscValidType(A,1);
4024f7415efSAmlan Barua 
4034f7415efSAmlan Barua   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
4044f7415efSAmlan Barua   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
4054f7415efSAmlan Barua   if (size == 1){ /* sequential case */
4064f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4074f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
4084f7415efSAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
4094f7415efSAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
4104f7415efSAmlan Barua #else
4118ca4c034SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
4128ca4c034SAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
4138ca4c034SAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
4144f7415efSAmlan Barua #endif
41597504071SAmlan Barua   } else { /* parallel cases */
4164f7415efSAmlan Barua     ptrdiff_t      alloc_local,local_n0,local_0_start;
4179496c9aeSAmlan Barua     ptrdiff_t      local_n1;
4189496c9aeSAmlan Barua     fftw_complex   *data_fout;
4199496c9aeSAmlan Barua     ptrdiff_t      local_1_start;
4209496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4219496c9aeSAmlan Barua     fftw_complex   *data_fin,*data_bout;
4229496c9aeSAmlan Barua #else
4234f7415efSAmlan Barua     double         *data_finr,*data_boutr;
4249496c9aeSAmlan Barua     PetscInt       n1,N1,vsize;
4259496c9aeSAmlan Barua     ptrdiff_t      temp;
4269496c9aeSAmlan Barua #endif
4279496c9aeSAmlan Barua 
4284f7415efSAmlan Barua     switch (ndim){
4294f7415efSAmlan Barua           case 1:
43057625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
43157625b84SAmlan Barua                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
43257625b84SAmlan Barua #else
43357625b84SAmlan Barua                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
43457625b84SAmlan Barua                  if (fin) {
43557625b84SAmlan Barua                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
43657625b84SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
43757625b84SAmlan Barua                          (*fin)->ops->destroy = VecDestroy_MPIFFTW;
43857625b84SAmlan Barua                          }
43957625b84SAmlan Barua                  if (fout) {
44057625b84SAmlan Barua                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
44157625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
44257625b84SAmlan Barua                           (*fout)->ops->destroy = VecDestroy_MPIFFTW;
44357625b84SAmlan Barua                          }
44457625b84SAmlan Barua                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
44557625b84SAmlan Barua                  if (bout) {
44657625b84SAmlan Barua                           data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
44757625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
44857625b84SAmlan Barua                           (*bout)->ops->destroy = VecDestroy_MPIFFTW;
44957625b84SAmlan Barua                          }
45057625b84SAmlan Barua           break;
45157625b84SAmlan Barua #endif
4524f7415efSAmlan Barua           case 2:
45397504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
4544f7415efSAmlan Barua                  alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4554f7415efSAmlan Barua                  N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
4564f7415efSAmlan Barua                  if (fin) {
4574f7415efSAmlan Barua                            data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4584f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
4594f7415efSAmlan Barua                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4604f7415efSAmlan Barua                           }
4614f7415efSAmlan Barua                  if (bout) {
4624f7415efSAmlan Barua                            data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4634f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
4644f7415efSAmlan Barua                            ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr);
4654f7415efSAmlan Barua                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4664f7415efSAmlan Barua                           }
467c8a8a4f0SAmlan Barua                  //n1 = 2*local_n1*dim[0];
46857625b84SAmlan Barua                  if (fout) {
46957625b84SAmlan Barua                             data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4709496c9aeSAmlan Barua                             ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
47157625b84SAmlan Barua                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
47257625b84SAmlan Barua                            }
4734f7415efSAmlan Barua #else
4744f7415efSAmlan Barua       /* Get local size */
4754f7415efSAmlan Barua                  alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
4764f7415efSAmlan Barua                  if (fin) {
4774f7415efSAmlan Barua                            data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4784f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
4794f7415efSAmlan Barua                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4804f7415efSAmlan Barua                           }
4814f7415efSAmlan Barua                  if (fout) {
4824f7415efSAmlan Barua                             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4834f7415efSAmlan Barua                             ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
4844f7415efSAmlan Barua                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
4854f7415efSAmlan Barua                            }
4864f7415efSAmlan Barua                  if (bout) {
4874f7415efSAmlan Barua                            data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4884f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
4894f7415efSAmlan Barua                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4904f7415efSAmlan Barua                           }
4914f7415efSAmlan Barua #endif
4924f7415efSAmlan Barua           break;
4934f7415efSAmlan Barua           case 3:
4944f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4954f7415efSAmlan Barua                  alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4964f7415efSAmlan Barua                  N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
4974f7415efSAmlan Barua                  if (fin) {
4984f7415efSAmlan Barua                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4994f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
5004f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5014f7415efSAmlan Barua                          }
5024f7415efSAmlan Barua                  if (bout) {
5034f7415efSAmlan Barua                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5044f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
5054f7415efSAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5064f7415efSAmlan Barua                           }
507c8a8a4f0SAmlan Barua                  //n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
50857625b84SAmlan Barua                  if (fout) {
50957625b84SAmlan Barua                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
51057625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
51157625b84SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
51257625b84SAmlan Barua                           }
5134f7415efSAmlan Barua #else
5140c9b18e4SAmlan Barua                  alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
5150c9b18e4SAmlan Barua                  if (fin) {
5160c9b18e4SAmlan Barua                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5170c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5180c9b18e4SAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5190c9b18e4SAmlan Barua                          }
5200c9b18e4SAmlan Barua                  if (fout) {
5210c9b18e4SAmlan Barua                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5220c9b18e4SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5230c9b18e4SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5240c9b18e4SAmlan Barua                          }
5250c9b18e4SAmlan Barua                  if (bout) {
5260c9b18e4SAmlan Barua                          data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5270c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5280c9b18e4SAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5290c9b18e4SAmlan Barua                          }
5304f7415efSAmlan Barua #endif
5314f7415efSAmlan Barua           break;
5324f7415efSAmlan Barua           default:
5334f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5344f7415efSAmlan Barua                  temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
5354f7415efSAmlan Barua                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
5364f7415efSAmlan Barua                  alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5374f7415efSAmlan Barua                  N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
5384f7415efSAmlan Barua                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
5394f7415efSAmlan Barua 
5404f7415efSAmlan Barua                  if (fin) {
5414f7415efSAmlan Barua                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5424f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
5434f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5444f7415efSAmlan Barua                         }
5454f7415efSAmlan Barua                  if (bout) {
5464f7415efSAmlan Barua                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5474f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
5489496c9aeSAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5494f7415efSAmlan Barua                         }
550c8a8a4f0SAmlan Barua                  //temp = fftw->partial_dim;
551c8a8a4f0SAmlan Barua                  //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
552c8a8a4f0SAmlan Barua                  //n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
55357625b84SAmlan Barua                  if (fout) {
55457625b84SAmlan Barua                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
55557625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
55657625b84SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
55757625b84SAmlan Barua                         }
5584f7415efSAmlan Barua #else
5590c9b18e4SAmlan Barua                 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
5600c9b18e4SAmlan Barua                 if (fin) {
5610c9b18e4SAmlan Barua                        data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5620c9b18e4SAmlan Barua                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5630c9b18e4SAmlan Barua                        (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5640c9b18e4SAmlan Barua                        }
5650c9b18e4SAmlan Barua                 if (fout) {
5660c9b18e4SAmlan Barua                          data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5670c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5680c9b18e4SAmlan Barua                          (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5690c9b18e4SAmlan Barua                        }
5700c9b18e4SAmlan Barua                 if (bout) {
5710c9b18e4SAmlan Barua                        data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5720c9b18e4SAmlan Barua                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5730c9b18e4SAmlan Barua                        (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5740c9b18e4SAmlan Barua                        }
5754f7415efSAmlan Barua #endif
5764f7415efSAmlan Barua             break;
5774f7415efSAmlan Barua           }
5784f7415efSAmlan Barua   }
5794f7415efSAmlan Barua   PetscFunctionReturn(0);
5804f7415efSAmlan Barua }
5814f7415efSAmlan Barua EXTERN_C_END
5824f7415efSAmlan Barua 
583c92418ccSAmlan Barua #undef __FUNCT__
58474a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW"
58574a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
5863c3a9c75SAmlan Barua {
5873c3a9c75SAmlan Barua   PetscErrorCode ierr;
5883c3a9c75SAmlan Barua   PetscFunctionBegin;
58974a26884SAmlan Barua   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
5903c3a9c75SAmlan Barua   PetscFunctionReturn(0);
5913c3a9c75SAmlan Barua }
59254efbe56SHong Zhang 
593b2b77a04SHong Zhang /*
59497504071SAmlan Barua       VecScatterPetscToFFTW_FFTW - Copies the user data to the vector that goes into FFTW block.
5953c3a9c75SAmlan Barua   Input A, x, y
5963c3a9c75SAmlan Barua   A - FFTW matrix
59754dd5118SAmlan Barua   x - user data
598b2b77a04SHong Zhang   Options Database Keys:
599b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags
600b2b77a04SHong Zhang 
601b2b77a04SHong Zhang    Level: intermediate
60297504071SAmlan Barua    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
60397504071SAmlan Barua          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
60497504071SAmlan Barua          zeros. This routine does that job by scattering operation.
60597504071SAmlan Barua 
606b2b77a04SHong Zhang 
607b2b77a04SHong Zhang */
6083c3a9c75SAmlan Barua 
6093c3a9c75SAmlan Barua EXTERN_C_BEGIN
6103c3a9c75SAmlan Barua #undef __FUNCT__
61174a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW_FTTW"
61274a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
6133c3a9c75SAmlan Barua {
6143c3a9c75SAmlan Barua   PetscErrorCode ierr;
6153c3a9c75SAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
6163c3a9c75SAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
6173c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
6189496c9aeSAmlan Barua   PetscInt       N=fft->N;
619b5d11533SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
6209496c9aeSAmlan Barua   PetscInt       low;
6218ca4c034SAmlan Barua   PetscInt       rank,size,vsize,vsize1;
6223c3a9c75SAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
6239496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
6243c3a9c75SAmlan Barua   VecScatter     vecscat;
6253c3a9c75SAmlan Barua   IS             list1,list2;
6269496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6279496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
6289496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
6299496c9aeSAmlan Barua   PetscInt       N1, n1 ,NM;
6309496c9aeSAmlan Barua   ptrdiff_t      temp;
6319496c9aeSAmlan Barua #endif
6323c3a9c75SAmlan Barua 
633f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
634f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
6353c3a9c75SAmlan Barua   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
6363c3a9c75SAmlan Barua 
637e81bb599SAmlan Barua   if (size==1)
638e81bb599SAmlan Barua     {
6398ca4c034SAmlan Barua           ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
6408ca4c034SAmlan Barua           ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
6419496c9aeSAmlan Barua           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
6426971673cSAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
6436971673cSAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
6446971673cSAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
6456971673cSAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
646b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
647e81bb599SAmlan Barua     }
648e81bb599SAmlan Barua 
649e81bb599SAmlan Barua  else{
650e81bb599SAmlan Barua 
6513c3a9c75SAmlan Barua  switch (ndim){
6523c3a9c75SAmlan Barua  case 1:
65364657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
65464657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
65564657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
65664657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
65764657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
65864657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
65964657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
66064657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
66164657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
66264657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
66364657d84SAmlan Barua #else
664e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
66564657d84SAmlan Barua #endif
6663c3a9c75SAmlan Barua  break;
6673c3a9c75SAmlan Barua  case 2:
668bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
669bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
670bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
671bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
672bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
673bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
674bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
675bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
676bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
677bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
678bd59e6c6SAmlan Barua #else
6793c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
6803c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
6819496c9aeSAmlan Barua       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
6829496c9aeSAmlan Barua       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
6839496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
6849496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
6853c3a9c75SAmlan Barua 
6863c3a9c75SAmlan Barua       if (dim[1]%2==0)
6873c3a9c75SAmlan Barua         NM = dim[1]+2;
6883c3a9c75SAmlan Barua       else
6893c3a9c75SAmlan Barua         NM = dim[1]+1;
6903c3a9c75SAmlan Barua 
6913c3a9c75SAmlan Barua       for (i=0;i<local_n0;i++){
6923c3a9c75SAmlan Barua          for (j=0;j<dim[1];j++){
6933c3a9c75SAmlan Barua             tempindx = i*dim[1] + j;
6943c3a9c75SAmlan Barua             tempindx1 = i*NM + j;
6955b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
6963c3a9c75SAmlan Barua             indx2[tempindx]=low+tempindx1;
6973c3a9c75SAmlan Barua         }
6983c3a9c75SAmlan Barua      }
6993c3a9c75SAmlan Barua 
7003c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
7013c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
7023c3a9c75SAmlan Barua 
703f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
704f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
705f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
706f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
707b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
708b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
709b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
710b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
711bd59e6c6SAmlan Barua #endif
7129496c9aeSAmlan Barua  break;
7133c3a9c75SAmlan Barua 
7143c3a9c75SAmlan Barua  case 3:
715bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
716bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
717bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
718bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
719bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
720bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
721bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
722bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
723bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
724bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
725bd59e6c6SAmlan Barua #else
72651d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
72751d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
72851d1eed7SAmlan Barua 
7299496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
7309496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
73151d1eed7SAmlan Barua 
73251d1eed7SAmlan Barua       if (dim[2]%2==0)
73351d1eed7SAmlan Barua         NM = dim[2]+2;
73451d1eed7SAmlan Barua       else
73551d1eed7SAmlan Barua         NM = dim[2]+1;
73651d1eed7SAmlan Barua 
73751d1eed7SAmlan Barua       for (i=0;i<local_n0;i++){
73851d1eed7SAmlan Barua          for (j=0;j<dim[1];j++){
73951d1eed7SAmlan Barua             for (k=0;k<dim[2];k++){
74051d1eed7SAmlan Barua                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
74151d1eed7SAmlan Barua                tempindx1 = i*dim[1]*NM + j*NM + k;
74251d1eed7SAmlan Barua                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
74351d1eed7SAmlan Barua                indx2[tempindx]=low+tempindx1;
74451d1eed7SAmlan Barua             }
74551d1eed7SAmlan Barua          }
74651d1eed7SAmlan Barua       }
74751d1eed7SAmlan Barua 
74851d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
74951d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
75051d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
75151d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
75251d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
75351d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
754b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
755b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
756b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
757b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
758bd59e6c6SAmlan Barua #endif
7599496c9aeSAmlan Barua  break;
7603c3a9c75SAmlan Barua 
7613c3a9c75SAmlan Barua  default:
762bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
763bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
764bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
765bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
766bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
767bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
768bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
769bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
770bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
771bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
772bd59e6c6SAmlan Barua #else
773e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
774e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
775e5d7f247SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
776e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
777e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
778e5d7f247SAmlan Barua 
779e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
780e5d7f247SAmlan Barua 
781e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
782e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
783e5d7f247SAmlan Barua 
784e5d7f247SAmlan Barua       if (dim[ndim-1]%2==0)
785ba6e06d0SAmlan Barua         NM = 2;
786e5d7f247SAmlan Barua       else
787ba6e06d0SAmlan Barua         NM = 1;
788e5d7f247SAmlan Barua 
7896971673cSAmlan Barua       j = low;
7906971673cSAmlan Barua       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
7916971673cSAmlan Barua          {
7926971673cSAmlan Barua           indx1[i] = local_0_start*partial_dim + i;
7936971673cSAmlan Barua           indx2[i] = j;
7946971673cSAmlan Barua           if (k%dim[ndim-1]==0)
7956971673cSAmlan Barua             { j+=NM;}
7966971673cSAmlan Barua           j++;
7976971673cSAmlan Barua          }
7986971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
7996971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
8006971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
8016971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8026971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8036971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
804b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
805b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
806b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
807b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
808bd59e6c6SAmlan Barua #endif
8099496c9aeSAmlan Barua  break;
8103c3a9c75SAmlan Barua   }
811e81bb599SAmlan Barua  }
8121abc6020SAmlan Barua  return(0);
8133c3a9c75SAmlan Barua }
8143c3a9c75SAmlan Barua EXTERN_C_END
8153c3a9c75SAmlan Barua 
8163c3a9c75SAmlan Barua #undef __FUNCT__
81774a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc"
81874a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
8193c3a9c75SAmlan Barua {
8203c3a9c75SAmlan Barua   PetscErrorCode ierr;
8213c3a9c75SAmlan Barua   PetscFunctionBegin;
82274a26884SAmlan Barua   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
8233c3a9c75SAmlan Barua   PetscFunctionReturn(0);
8243c3a9c75SAmlan Barua }
82554efbe56SHong Zhang 
8265b3e41ffSAmlan Barua /*
82797504071SAmlan Barua       VecScatterFFTWToPetsc_FFTW - Converts FFTW output to the PETSc vector that user can use.
8285b3e41ffSAmlan Barua   Input A, x, y
8295b3e41ffSAmlan Barua   A - FFTW matrix
8305b3e41ffSAmlan Barua   x - FFTW vector
8315b3e41ffSAmlan Barua   y - PETSc vector that user can read
8325b3e41ffSAmlan Barua 
8335b3e41ffSAmlan Barua    Level: intermediate
83497504071SAmlan Barua    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
83597504071SAmlan Barua          VecScatterFFTWToPetsc removes those extra zeros.
8365b3e41ffSAmlan Barua 
8373c3a9c75SAmlan Barua */
8383c3a9c75SAmlan Barua 
8393c3a9c75SAmlan Barua EXTERN_C_BEGIN
8403c3a9c75SAmlan Barua #undef __FUNCT__
84174a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
84274a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
8435b3e41ffSAmlan Barua {
8445b3e41ffSAmlan Barua   PetscErrorCode ierr;
8455b3e41ffSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
8465b3e41ffSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
8475b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
8489496c9aeSAmlan Barua   PetscInt       N=fft->N;
849b3655f67SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
8509496c9aeSAmlan Barua   PetscInt       low;
8519496c9aeSAmlan Barua   PetscInt       rank,size;
8525b3e41ffSAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
8539496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
8545b3e41ffSAmlan Barua   VecScatter     vecscat;
8555b3e41ffSAmlan Barua   IS             list1,list2;
8569496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
8579496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
8589496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
8599496c9aeSAmlan Barua   PetscInt       N1, n1 ,NM;
8609496c9aeSAmlan Barua   ptrdiff_t      temp;
8619496c9aeSAmlan Barua #endif
8629496c9aeSAmlan Barua 
8635b3e41ffSAmlan Barua 
8645b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
8655b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
866b3655f67SAmlan Barua   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr);
8675b3e41ffSAmlan Barua 
868e81bb599SAmlan Barua   if (size==1){
869b3655f67SAmlan Barua     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
8706971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
8716971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8726971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8736971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
874b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
875e81bb599SAmlan Barua   }
876e81bb599SAmlan Barua   else{
877e81bb599SAmlan Barua 
878e81bb599SAmlan Barua   switch (ndim){
879e81bb599SAmlan Barua   case 1:
88064657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
88164657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
8829496c9aeSAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1);
8839496c9aeSAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2);
88464657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
88564657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
88664657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
88764657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
88864657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
88964657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
89064657d84SAmlan Barua #else
8916a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
89264657d84SAmlan Barua #endif
8935b3e41ffSAmlan Barua   break;
8945b3e41ffSAmlan Barua   case 2:
895bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
896bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
897bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
898bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
899bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
900bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
901bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
902bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
903bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
904bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
905bd59e6c6SAmlan Barua #else
9065b3e41ffSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
9075b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
9085b3e41ffSAmlan Barua 
9099496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
9109496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
9115b3e41ffSAmlan Barua 
9125b3e41ffSAmlan Barua       if (dim[1]%2==0)
9135b3e41ffSAmlan Barua         NM = dim[1]+2;
9145b3e41ffSAmlan Barua       else
9155b3e41ffSAmlan Barua         NM = dim[1]+1;
9165b3e41ffSAmlan Barua 
9175b3e41ffSAmlan Barua       for (i=0;i<local_n0;i++){
9185b3e41ffSAmlan Barua          for (j=0;j<dim[1];j++){
9195b3e41ffSAmlan Barua             tempindx = i*dim[1] + j;
9205b3e41ffSAmlan Barua             tempindx1 = i*NM + j;
9215b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
9225b3e41ffSAmlan Barua             indx2[tempindx]=low+tempindx1;
9235b3e41ffSAmlan Barua          }
9245b3e41ffSAmlan Barua        }
9255b3e41ffSAmlan Barua 
9265b3e41ffSAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
9275b3e41ffSAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
9285b3e41ffSAmlan Barua 
9295b3e41ffSAmlan Barua        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
9305b3e41ffSAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9315b3e41ffSAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9325b3e41ffSAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
933b223cc97SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
934b223cc97SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
935b223cc97SAmlan Barua        ierr = PetscFree(indx1);CHKERRQ(ierr);
936b223cc97SAmlan Barua        ierr = PetscFree(indx2);CHKERRQ(ierr);
937bd59e6c6SAmlan Barua #endif
9389496c9aeSAmlan Barua   break;
9395b3e41ffSAmlan Barua   case 3:
940bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
941bd59e6c6SAmlan Barua        alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
942bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
943bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
944bd59e6c6SAmlan Barua        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
945bd59e6c6SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946bd59e6c6SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
947bd59e6c6SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
948bd59e6c6SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
949bd59e6c6SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
950bd59e6c6SAmlan Barua #else
951bd59e6c6SAmlan Barua 
95251d1eed7SAmlan Barua        alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
95351d1eed7SAmlan Barua        N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
95451d1eed7SAmlan Barua 
9559496c9aeSAmlan Barua //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
9569496c9aeSAmlan Barua //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
9579496c9aeSAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
9589496c9aeSAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
95951d1eed7SAmlan Barua 
96051d1eed7SAmlan Barua        if (dim[2]%2==0)
96151d1eed7SAmlan Barua         NM = dim[2]+2;
96251d1eed7SAmlan Barua        else
96351d1eed7SAmlan Barua         NM = dim[2]+1;
96451d1eed7SAmlan Barua 
96551d1eed7SAmlan Barua        for (i=0;i<local_n0;i++){
96651d1eed7SAmlan Barua           for (j=0;j<dim[1];j++){
96751d1eed7SAmlan Barua              for (k=0;k<dim[2];k++){
96851d1eed7SAmlan Barua                 tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
96951d1eed7SAmlan Barua                 tempindx1 = i*dim[1]*NM + j*NM + k;
97051d1eed7SAmlan Barua                 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
97151d1eed7SAmlan Barua                 indx2[tempindx]=low+tempindx1;
97251d1eed7SAmlan Barua              }
97351d1eed7SAmlan Barua           }
97451d1eed7SAmlan Barua        }
97551d1eed7SAmlan Barua 
97651d1eed7SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
97751d1eed7SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
97851d1eed7SAmlan Barua 
97951d1eed7SAmlan Barua        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
98051d1eed7SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
98151d1eed7SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
98251d1eed7SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
983b223cc97SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
984b223cc97SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
985b223cc97SAmlan Barua        ierr = PetscFree(indx1);CHKERRQ(ierr);
986b223cc97SAmlan Barua        ierr = PetscFree(indx2);CHKERRQ(ierr);
987bd59e6c6SAmlan Barua #endif
9889496c9aeSAmlan Barua   break;
9895b3e41ffSAmlan Barua   default:
990bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
991bd59e6c6SAmlan Barua        alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
992bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
993bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
994bd59e6c6SAmlan Barua        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
995bd59e6c6SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996bd59e6c6SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
997bd59e6c6SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
998bd59e6c6SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
999bd59e6c6SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1000bd59e6c6SAmlan Barua #else
1001ba6e06d0SAmlan Barua        temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1002ba6e06d0SAmlan Barua        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1003ba6e06d0SAmlan Barua        alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1004ba6e06d0SAmlan Barua        N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1005ba6e06d0SAmlan Barua        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1006ba6e06d0SAmlan Barua 
1007ba6e06d0SAmlan Barua        partial_dim = fftw->partial_dim;
1008ba6e06d0SAmlan Barua 
1009ba6e06d0SAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1010ba6e06d0SAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1011ba6e06d0SAmlan Barua 
1012ba6e06d0SAmlan Barua        if (dim[ndim-1]%2==0)
1013ba6e06d0SAmlan Barua          NM = 2;
1014ba6e06d0SAmlan Barua        else
1015ba6e06d0SAmlan Barua          NM = 1;
1016ba6e06d0SAmlan Barua 
1017ba6e06d0SAmlan Barua        j = low;
1018ba6e06d0SAmlan Barua        for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1019ba6e06d0SAmlan Barua           {
1020ba6e06d0SAmlan Barua            indx1[i] = local_0_start*partial_dim + i;
1021ba6e06d0SAmlan Barua            indx2[i] = j;
1022ba6e06d0SAmlan Barua            if (k%dim[ndim-1]==0)
1023ba6e06d0SAmlan Barua              { j+=NM;}
1024ba6e06d0SAmlan Barua            j++;
1025ba6e06d0SAmlan Barua           }
1026ba6e06d0SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1027ba6e06d0SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1028ba6e06d0SAmlan Barua 
1029ba6e06d0SAmlan Barua        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1030ba6e06d0SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1031ba6e06d0SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032ba6e06d0SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1033b223cc97SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1034b223cc97SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1035b223cc97SAmlan Barua        ierr = PetscFree(indx1);CHKERRQ(ierr);
1036b223cc97SAmlan Barua        ierr = PetscFree(indx2);CHKERRQ(ierr);
1037bd59e6c6SAmlan Barua #endif
10389496c9aeSAmlan Barua   break;
10395b3e41ffSAmlan Barua   }
1040e81bb599SAmlan Barua   }
10415b3e41ffSAmlan Barua   return 0;
10425b3e41ffSAmlan Barua }
10435b3e41ffSAmlan Barua EXTERN_C_END
10445b3e41ffSAmlan Barua 
10455b3e41ffSAmlan Barua EXTERN_C_BEGIN
10465b3e41ffSAmlan Barua #undef __FUNCT__
10473c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
10483c3a9c75SAmlan Barua /*
10493c3a9c75SAmlan Barua       MatCreate_FFTW - Creates a matrix object that provides FFT
10503c3a9c75SAmlan Barua   via the external package FFTW
10513c3a9c75SAmlan Barua   Options Database Keys:
10523c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
10533c3a9c75SAmlan Barua 
10543c3a9c75SAmlan Barua    Level: intermediate
10553c3a9c75SAmlan Barua 
10563c3a9c75SAmlan Barua */
10573c3a9c75SAmlan Barua 
1058b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A)
1059b2b77a04SHong Zhang {
1060b2b77a04SHong Zhang   PetscErrorCode ierr;
1061b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
1062b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1063b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1064b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1065b2b77a04SHong Zhang   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1066b2b77a04SHong Zhang   PetscBool      flg;
10674f48bc67SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr;
106811902ff2SHong Zhang   PetscMPIInt    size,rank;
10699496c9aeSAmlan Barua   ptrdiff_t      *pdim;
10709496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
10719496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
10729496c9aeSAmlan Barua    ptrdiff_t     temp;
10738ca4c034SAmlan Barua    PetscInt      N1,tot_dim;
10744f48bc67SAmlan Barua #else
10754f48bc67SAmlan Barua    PetscInt n1;
10769496c9aeSAmlan Barua #endif
10779496c9aeSAmlan Barua 
1078b2b77a04SHong Zhang   PetscFunctionBegin;
1079b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
108011902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1081c92418ccSAmlan Barua 
10821878d478SAmlan Barua   fftw_mpi_init();
108311902ff2SHong Zhang   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
108411902ff2SHong Zhang   pdim[0] = dim[0];
10858ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
10868ca4c034SAmlan Barua   tot_dim = 2*dim[0];
10878ca4c034SAmlan Barua #endif
108811902ff2SHong Zhang   for (ctr=1;ctr<ndim;ctr++)
108911902ff2SHong Zhang      {
10906882372aSHong Zhang           partial_dim *= dim[ctr];
109111902ff2SHong Zhang           pdim[ctr] = dim[ctr];
10928ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
10938ca4c034SAmlan Barua           if (ctr==ndim-1)
10948ca4c034SAmlan Barua             tot_dim *= (dim[ctr]/2+1);
10958ca4c034SAmlan Barua           else
10968ca4c034SAmlan Barua             tot_dim *= dim[ctr];
10978ca4c034SAmlan Barua #endif
10986882372aSHong Zhang      }
10993c3a9c75SAmlan Barua 
1100b2b77a04SHong Zhang   if (size == 1) {
1101e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1102b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1103b2b77a04SHong Zhang     n = N;
1104e81bb599SAmlan Barua #else
1105e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1106e81bb599SAmlan Barua     n = tot_dim;
1107e81bb599SAmlan Barua #endif
1108e81bb599SAmlan Barua 
1109b2b77a04SHong Zhang   } else {
11101abc6020SAmlan Barua     ptrdiff_t alloc_local,local_n0,local_0_start;
1111b2b77a04SHong Zhang     switch (ndim){
1112b2b77a04SHong Zhang     case 1:
11133c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11143c3a9c75SAmlan Barua    SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1115e5d7f247SAmlan Barua #else
11169496c9aeSAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
11176882372aSHong Zhang       n = (PetscInt)local_n0;
11189496c9aeSAmlan Barua       n1 = (PetscInt) local_n1;
11199496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1120e5d7f247SAmlan Barua #endif
1121b2b77a04SHong Zhang       break;
1122b2b77a04SHong Zhang     case 2:
11235b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
1124b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1125b2b77a04SHong Zhang       /*
1126b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1127b2b77a04SHong Zhang        PetscSynchronizedFlush(comm);
1128b2b77a04SHong Zhang        */
1129b2b77a04SHong Zhang       n = (PetscInt)local_n0*dim[1];
1130b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
11315b3e41ffSAmlan Barua #else
11325b3e41ffSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
11335b3e41ffSAmlan Barua       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1134c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
11355b3e41ffSAmlan Barua #endif
1136b2b77a04SHong Zhang       break;
1137b2b77a04SHong Zhang     case 3:
113851d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
113951d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
11406882372aSHong Zhang       n = (PetscInt)local_n0*dim[1]*dim[2];
11416882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
114251d1eed7SAmlan Barua #else
114351d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
114451d1eed7SAmlan Barua       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1145c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
114651d1eed7SAmlan Barua #endif
1147b2b77a04SHong Zhang       break;
1148b2b77a04SHong Zhang     default:
1149b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
115011902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
11516882372aSHong Zhang       n = (PetscInt)local_n0*partial_dim;
11526882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1153b3a17365SAmlan Barua #else
1154b3a17365SAmlan Barua       temp = pdim[ndim-1];
1155b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
1156b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1157b3a17365SAmlan Barua       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1158b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1159b3a17365SAmlan Barua       pdim[ndim-1] = temp;
1160c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1161b3a17365SAmlan Barua #endif
1162b2b77a04SHong Zhang       break;
1163b2b77a04SHong Zhang     }
1164b2b77a04SHong Zhang   }
1165b2b77a04SHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1166b2b77a04SHong Zhang   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1167b2b77a04SHong Zhang   fft->data = (void*)fftw;
1168b2b77a04SHong Zhang 
1169b2b77a04SHong Zhang   fft->n           = n;
1170c92418ccSAmlan Barua   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1171e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
1172c92418ccSAmlan Barua   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1173b1301b2eSHong Zhang   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1174c92418ccSAmlan Barua 
1175b2b77a04SHong Zhang   fftw->p_forward  = 0;
1176b2b77a04SHong Zhang   fftw->p_backward = 0;
1177b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1178b2b77a04SHong Zhang 
1179b2b77a04SHong Zhang   if (size == 1){
1180b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1181b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1182b2b77a04SHong Zhang   } else {
1183b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1184b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1185b2b77a04SHong Zhang   }
1186b2b77a04SHong Zhang   fft->matdestroy          = MatDestroy_FFTW;
1187b2b77a04SHong Zhang   A->assembled          = PETSC_TRUE;
11884be45526SHong Zhang   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);
118974a26884SAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW);
119074a26884SAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW);
1191b2b77a04SHong Zhang 
1192b2b77a04SHong Zhang   /* get runtime options */
1193b2b77a04SHong Zhang   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1194b2b77a04SHong Zhang     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1195b2b77a04SHong Zhang     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1196b2b77a04SHong Zhang   PetscOptionsEnd();
1197b2b77a04SHong Zhang   PetscFunctionReturn(0);
1198b2b77a04SHong Zhang }
1199b2b77a04SHong Zhang EXTERN_C_END
12003c3a9c75SAmlan Barua 
12013c3a9c75SAmlan Barua 
12023c3a9c75SAmlan Barua 
12033c3a9c75SAmlan Barua 
1204