xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 1abc60207cdaa74fcbe9e62dcf6e5da233a93d51)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14e5d7f247SAmlan Barua   PetscInt   partial_dim;
15b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
16b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
19b2b77a04SHong Zhang } Mat_FFTW;
20b2b77a04SHong Zhang 
21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
274be45526SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); // to be removed!
28b2b77a04SHong Zhang 
29b2b77a04SHong Zhang #undef __FUNCT__
30b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
31b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
32b2b77a04SHong Zhang {
33b2b77a04SHong Zhang   PetscErrorCode ierr;
34b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
35b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
36b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
37b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
38b2b77a04SHong Zhang 
39b2b77a04SHong Zhang   PetscFunctionBegin;
40b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
41b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
42b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
43b2b77a04SHong Zhang     switch (ndim){
44b2b77a04SHong Zhang     case 1:
4558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
46b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
4758a912c5SAmlan Barua #else
4858a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
4958a912c5SAmlan Barua #endif
50b2b77a04SHong Zhang       break;
51b2b77a04SHong Zhang     case 2:
5258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
53b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
5458a912c5SAmlan Barua #else
5558a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
5658a912c5SAmlan Barua #endif
57b2b77a04SHong Zhang       break;
58b2b77a04SHong Zhang     case 3:
5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
60b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6158a912c5SAmlan Barua #else
6258a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
6358a912c5SAmlan Barua #endif
64b2b77a04SHong Zhang       break;
65b2b77a04SHong Zhang     default:
6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
67b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6858a912c5SAmlan Barua #else
6958a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
7058a912c5SAmlan Barua #endif
71b2b77a04SHong Zhang       break;
72b2b77a04SHong Zhang     }
73b2b77a04SHong Zhang     fftw->finarray  = x_array;
74b2b77a04SHong Zhang     fftw->foutarray = y_array;
75b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
76b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
77b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
78b2b77a04SHong Zhang   } else { /* use existing plan */
79b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
80b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
81b2b77a04SHong Zhang     } else {
82b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
83b2b77a04SHong Zhang     }
84b2b77a04SHong Zhang   }
85b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
86b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
87b2b77a04SHong Zhang   PetscFunctionReturn(0);
88b2b77a04SHong Zhang }
89b2b77a04SHong Zhang 
90b2b77a04SHong Zhang #undef __FUNCT__
91b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
92b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
93b2b77a04SHong Zhang {
94b2b77a04SHong Zhang   PetscErrorCode ierr;
95b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
96b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
97b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
98b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
99b2b77a04SHong Zhang 
100b2b77a04SHong Zhang   PetscFunctionBegin;
101b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
102b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
103b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
104b2b77a04SHong Zhang     switch (ndim){
105b2b77a04SHong Zhang     case 1:
10658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
107b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
10858a912c5SAmlan Barua #else
109e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
11058a912c5SAmlan Barua #endif
111b2b77a04SHong Zhang       break;
112b2b77a04SHong Zhang     case 2:
11358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
114b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
11558a912c5SAmlan Barua #else
116e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
11758a912c5SAmlan Barua #endif
118b2b77a04SHong Zhang       break;
119b2b77a04SHong Zhang     case 3:
12058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
121b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12258a912c5SAmlan Barua #else
123e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
12458a912c5SAmlan Barua #endif
125b2b77a04SHong Zhang       break;
126b2b77a04SHong Zhang     default:
12758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
128b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12958a912c5SAmlan Barua #else
130e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
13158a912c5SAmlan Barua #endif
132b2b77a04SHong Zhang       break;
133b2b77a04SHong Zhang     }
134b2b77a04SHong Zhang     fftw->binarray  = x_array;
135b2b77a04SHong Zhang     fftw->boutarray = y_array;
136b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
137b2b77a04SHong Zhang   } else { /* use existing plan */
138b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
139b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
140b2b77a04SHong Zhang     } else {
141b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
142b2b77a04SHong Zhang     }
143b2b77a04SHong Zhang   }
144b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
145b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
146b2b77a04SHong Zhang   PetscFunctionReturn(0);
147b2b77a04SHong Zhang }
148b2b77a04SHong Zhang 
149b2b77a04SHong Zhang #undef __FUNCT__
150b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
151b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
152b2b77a04SHong Zhang {
153b2b77a04SHong Zhang   PetscErrorCode ierr;
154b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
155b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
156b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
157c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
158b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
159b2b77a04SHong Zhang 
160b2b77a04SHong Zhang   PetscFunctionBegin;
161b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
162b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
163b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
164b2b77a04SHong Zhang     switch (ndim){
165b2b77a04SHong Zhang     case 1:
1663c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
167b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
168ae0a50aaSHong Zhang #else
169ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
1703c3a9c75SAmlan Barua #endif
171b2b77a04SHong Zhang       break;
172b2b77a04SHong Zhang     case 2:
1733c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
174b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1753c3a9c75SAmlan Barua #else
1763c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1773c3a9c75SAmlan Barua #endif
178b2b77a04SHong Zhang       break;
179b2b77a04SHong Zhang     case 3:
1803c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
181b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1823c3a9c75SAmlan Barua #else
18351d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1843c3a9c75SAmlan Barua #endif
185b2b77a04SHong Zhang       break;
186b2b77a04SHong Zhang     default:
1873c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
188c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1893c3a9c75SAmlan Barua #else
1903c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1913c3a9c75SAmlan Barua #endif
192b2b77a04SHong Zhang       break;
193b2b77a04SHong Zhang     }
194b2b77a04SHong Zhang     fftw->finarray  = x_array;
195b2b77a04SHong Zhang     fftw->foutarray = y_array;
196b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
197b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
198b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
199b2b77a04SHong Zhang   } else { /* use existing plan */
200b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
201b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
202b2b77a04SHong Zhang     } else {
203b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
204b2b77a04SHong Zhang     }
205b2b77a04SHong Zhang   }
206b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
207b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
208b2b77a04SHong Zhang   PetscFunctionReturn(0);
209b2b77a04SHong Zhang }
210b2b77a04SHong Zhang 
211b2b77a04SHong Zhang #undef __FUNCT__
212b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
213b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
214b2b77a04SHong Zhang {
215b2b77a04SHong Zhang   PetscErrorCode ierr;
216b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
217b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
218b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
219c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
220b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
221b2b77a04SHong Zhang 
222b2b77a04SHong Zhang   PetscFunctionBegin;
223b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
224b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
225b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
226b2b77a04SHong Zhang     switch (ndim){
227b2b77a04SHong Zhang     case 1:
2283c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
229b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
230ae0a50aaSHong Zhang #else
231ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
2323c3a9c75SAmlan Barua #endif
233b2b77a04SHong Zhang       break;
234b2b77a04SHong Zhang     case 2:
2353c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
236b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2373c3a9c75SAmlan Barua #else
2383c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2393c3a9c75SAmlan Barua #endif
240b2b77a04SHong Zhang       break;
241b2b77a04SHong Zhang     case 3:
2423c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
243b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2443c3a9c75SAmlan Barua #else
2453c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2463c3a9c75SAmlan Barua #endif
247b2b77a04SHong Zhang       break;
248b2b77a04SHong Zhang     default:
2493c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
250c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2513c3a9c75SAmlan Barua #else
2523c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2533c3a9c75SAmlan Barua #endif
254b2b77a04SHong Zhang       break;
255b2b77a04SHong Zhang     }
256b2b77a04SHong Zhang     fftw->binarray  = x_array;
257b2b77a04SHong Zhang     fftw->boutarray = y_array;
258b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
259b2b77a04SHong Zhang   } else { /* use existing plan */
260b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
261b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
262b2b77a04SHong Zhang     } else {
263b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
264b2b77a04SHong Zhang     }
265b2b77a04SHong Zhang   }
266b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
267b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
268b2b77a04SHong Zhang   PetscFunctionReturn(0);
269b2b77a04SHong Zhang }
270b2b77a04SHong Zhang 
271b2b77a04SHong Zhang #undef __FUNCT__
272b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
273b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
274b2b77a04SHong Zhang {
275b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
276b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
277b2b77a04SHong Zhang   PetscErrorCode ierr;
278b2b77a04SHong Zhang 
279b2b77a04SHong Zhang   PetscFunctionBegin;
280b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
281b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
282b1301b2eSHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
283bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
284b2b77a04SHong Zhang   PetscFunctionReturn(0);
285b2b77a04SHong Zhang }
286b2b77a04SHong Zhang 
287c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
288b2b77a04SHong Zhang #undef __FUNCT__
289b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
290b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
291b2b77a04SHong Zhang {
292b2b77a04SHong Zhang   PetscErrorCode  ierr;
293b2b77a04SHong Zhang   PetscScalar     *array;
294b2b77a04SHong Zhang 
295b2b77a04SHong Zhang   PetscFunctionBegin;
296b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
297b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
298b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
299b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
300b2b77a04SHong Zhang   PetscFunctionReturn(0);
301b2b77a04SHong Zhang }
302b2b77a04SHong Zhang 
3033c3a9c75SAmlan Barua 
3044f7415efSAmlan Barua #undef __FUNCT__
3054be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW"
3064be45526SHong Zhang /*@
307b2aa233eSHong Zhang    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
3084f7415efSAmlan Barua      parallel layout determined by FFTW
3094f7415efSAmlan Barua 
3104f7415efSAmlan Barua    Collective on Mat
3114f7415efSAmlan Barua 
3124f7415efSAmlan Barua    Input Parameter:
3134f7415efSAmlan Barua .  mat - the matrix
3144f7415efSAmlan Barua 
3154f7415efSAmlan Barua    Output Parameter:
3164f7415efSAmlan Barua +   fin - (optional) input vector of forward FFTW
3174f7415efSAmlan Barua -   fout - (optional) output vector of forward FFTW
3184f7415efSAmlan Barua 
3194f7415efSAmlan Barua   Level: advanced
3204f7415efSAmlan Barua 
3214f7415efSAmlan Barua .seealso: MatCreateFFTW()
3224be45526SHong Zhang @*/
3234be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
3244be45526SHong Zhang {
3254be45526SHong Zhang   PetscErrorCode ierr;
3264be45526SHong Zhang   PetscFunctionBegin;
3274be45526SHong Zhang   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
3284be45526SHong Zhang   PetscFunctionReturn(0);
3294be45526SHong Zhang }
3304be45526SHong Zhang 
3314f7415efSAmlan Barua EXTERN_C_BEGIN
3324be45526SHong Zhang #undef __FUNCT__
3334be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW"
3344be45526SHong Zhang PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
3354f7415efSAmlan Barua {
3364f7415efSAmlan Barua   PetscErrorCode ierr;
3374f7415efSAmlan Barua   PetscMPIInt    size,rank;
3384f7415efSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
3394f7415efSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
3404f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
3419496c9aeSAmlan Barua   PetscInt       N=fft->N;
3424f7415efSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
3434f7415efSAmlan Barua 
3444f7415efSAmlan Barua   PetscFunctionBegin;
3454f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
3464f7415efSAmlan Barua   PetscValidType(A,1);
3474f7415efSAmlan Barua 
3484f7415efSAmlan Barua   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
3494f7415efSAmlan Barua   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
3504f7415efSAmlan Barua   if (size == 1){ /* sequential case */
3514f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
3524f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
3534f7415efSAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
3544f7415efSAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
3554f7415efSAmlan Barua #else
3568ca4c034SAmlan Barua //    if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
3578ca4c034SAmlan Barua //    if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
3588ca4c034SAmlan Barua //    if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);}
3598ca4c034SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
3608ca4c034SAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
3618ca4c034SAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
3624f7415efSAmlan Barua #endif
3634f7415efSAmlan Barua   } else {
3644f7415efSAmlan Barua     ptrdiff_t      alloc_local,local_n0,local_0_start;
3659496c9aeSAmlan Barua     ptrdiff_t      local_n1;
3669496c9aeSAmlan Barua     fftw_complex   *data_fout;
3679496c9aeSAmlan Barua     ptrdiff_t      local_1_start;
3689496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
3699496c9aeSAmlan Barua     fftw_complex   *data_fin,*data_bout;
3709496c9aeSAmlan Barua #else
3714f7415efSAmlan Barua     double         *data_finr,*data_boutr;
3729496c9aeSAmlan Barua     PetscInt       n1,N1,vsize;
3739496c9aeSAmlan Barua     ptrdiff_t      temp;
3749496c9aeSAmlan Barua #endif
3759496c9aeSAmlan Barua 
3764f7415efSAmlan Barua     switch (ndim){
3774f7415efSAmlan Barua           case 1:
37857625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
37957625b84SAmlan Barua                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
38057625b84SAmlan Barua #else
3819496c9aeSAmlan Barua                  //SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet");
38257625b84SAmlan Barua                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
38357625b84SAmlan Barua                  if (fin) {
38457625b84SAmlan Barua                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
38557625b84SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
38657625b84SAmlan Barua                          (*fin)->ops->destroy = VecDestroy_MPIFFTW;
38757625b84SAmlan Barua                          }
38857625b84SAmlan Barua                  if (fout) {
38957625b84SAmlan Barua                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
39057625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
39157625b84SAmlan Barua                           (*fout)->ops->destroy = VecDestroy_MPIFFTW;
39257625b84SAmlan Barua                          }
39357625b84SAmlan Barua                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
39457625b84SAmlan Barua                  if (bout) {
39557625b84SAmlan Barua                           data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
39657625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
39757625b84SAmlan Barua                           (*bout)->ops->destroy = VecDestroy_MPIFFTW;
39857625b84SAmlan Barua                          }
39957625b84SAmlan Barua           break;
40057625b84SAmlan Barua #endif
4014f7415efSAmlan Barua           case 2:
4024f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4034f7415efSAmlan Barua                  alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4044f7415efSAmlan Barua                  N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
4054f7415efSAmlan Barua                  if (fin) {
4064f7415efSAmlan Barua                            data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4074f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
4084f7415efSAmlan Barua                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4094f7415efSAmlan Barua                           }
4104f7415efSAmlan Barua                  if (bout) {
4114f7415efSAmlan Barua                            data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4124f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
4134f7415efSAmlan Barua                            ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr);
4144f7415efSAmlan Barua                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4154f7415efSAmlan Barua                           }
416c8a8a4f0SAmlan Barua                  //n1 = 2*local_n1*dim[0];
41757625b84SAmlan Barua                  if (fout) {
41857625b84SAmlan Barua                             data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4199496c9aeSAmlan Barua                             ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
42057625b84SAmlan Barua                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
42157625b84SAmlan Barua                            }
4224f7415efSAmlan Barua #else
4234f7415efSAmlan Barua       /* Get local size */
4244f7415efSAmlan Barua                  alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
4254f7415efSAmlan Barua                  if (fin) {
4264f7415efSAmlan Barua                            data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4274f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
4284f7415efSAmlan Barua                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4294f7415efSAmlan Barua                           }
4304f7415efSAmlan Barua                  if (fout) {
4314f7415efSAmlan Barua                             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4324f7415efSAmlan Barua                             ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
4334f7415efSAmlan Barua                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
4344f7415efSAmlan Barua                            }
4354f7415efSAmlan Barua                  if (bout) {
4364f7415efSAmlan Barua                            data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4374f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
4384f7415efSAmlan Barua                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4394f7415efSAmlan Barua                           }
4404f7415efSAmlan Barua #endif
4414f7415efSAmlan Barua           break;
4424f7415efSAmlan Barua           case 3:
4434f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4444f7415efSAmlan Barua                  alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4454f7415efSAmlan Barua                  N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
4464f7415efSAmlan Barua                  if (fin) {
4474f7415efSAmlan Barua                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4484f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
4494f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4504f7415efSAmlan Barua                          }
4514f7415efSAmlan Barua                  if (bout) {
4524f7415efSAmlan Barua                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4534f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
4544f7415efSAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4554f7415efSAmlan Barua                           }
456c8a8a4f0SAmlan Barua                  //n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
45757625b84SAmlan Barua                  if (fout) {
45857625b84SAmlan Barua                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
45957625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
46057625b84SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
46157625b84SAmlan Barua                           }
4624f7415efSAmlan Barua #else
4630c9b18e4SAmlan Barua                  alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
4640c9b18e4SAmlan Barua                  if (fin) {
4650c9b18e4SAmlan Barua                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4660c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
4670c9b18e4SAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4680c9b18e4SAmlan Barua                          }
4690c9b18e4SAmlan Barua                  if (fout) {
4700c9b18e4SAmlan Barua                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4710c9b18e4SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
4720c9b18e4SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
4730c9b18e4SAmlan Barua                          }
4740c9b18e4SAmlan Barua                  if (bout) {
4750c9b18e4SAmlan Barua                          data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4760c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
4770c9b18e4SAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4780c9b18e4SAmlan Barua                          }
4794f7415efSAmlan Barua #endif
4804f7415efSAmlan Barua           break;
4814f7415efSAmlan Barua           default:
4824f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4834f7415efSAmlan Barua                  temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
4844f7415efSAmlan Barua                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
4854f7415efSAmlan Barua                  alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4864f7415efSAmlan Barua                  N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
4874f7415efSAmlan Barua                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
4884f7415efSAmlan Barua 
4894f7415efSAmlan Barua                  if (fin) {
4904f7415efSAmlan Barua                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4914f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
4924f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4934f7415efSAmlan Barua                         }
4944f7415efSAmlan Barua                  if (bout) {
4954f7415efSAmlan Barua                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4964f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
4979496c9aeSAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4984f7415efSAmlan Barua                         }
499c8a8a4f0SAmlan Barua                  //temp = fftw->partial_dim;
500c8a8a4f0SAmlan Barua                  //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
501c8a8a4f0SAmlan Barua                  //n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
50257625b84SAmlan Barua                  if (fout) {
50357625b84SAmlan Barua                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
50457625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
50557625b84SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
50657625b84SAmlan Barua                         }
5074f7415efSAmlan Barua #else
5080c9b18e4SAmlan Barua                 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
5090c9b18e4SAmlan Barua                 if (fin) {
5100c9b18e4SAmlan Barua                        data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5110c9b18e4SAmlan Barua                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5120c9b18e4SAmlan Barua                        (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5130c9b18e4SAmlan Barua                        }
5140c9b18e4SAmlan Barua                 if (fout) {
5150c9b18e4SAmlan Barua                          data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5160c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5170c9b18e4SAmlan Barua                          (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5180c9b18e4SAmlan Barua                        }
5190c9b18e4SAmlan Barua                 if (bout) {
5200c9b18e4SAmlan Barua                        data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5210c9b18e4SAmlan Barua                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5220c9b18e4SAmlan Barua                        (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5230c9b18e4SAmlan Barua                        }
5244f7415efSAmlan Barua #endif
5254f7415efSAmlan Barua             break;
5264f7415efSAmlan Barua           }
5274f7415efSAmlan Barua   }
5284f7415efSAmlan Barua   PetscFunctionReturn(0);
5294f7415efSAmlan Barua }
5304f7415efSAmlan Barua EXTERN_C_END
5314f7415efSAmlan Barua 
532c92418ccSAmlan Barua #undef __FUNCT__
533b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW"
534b2b77a04SHong Zhang /*
535b2b77a04SHong Zhang    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
536b2b77a04SHong Zhang      parallel layout determined by FFTW
537b2b77a04SHong Zhang 
538b2b77a04SHong Zhang    Collective on Mat
539b2b77a04SHong Zhang 
540b2b77a04SHong Zhang    Input Parameter:
541b2b77a04SHong Zhang .  mat - the matrix
542b2b77a04SHong Zhang 
543b2b77a04SHong Zhang    Output Parameter:
544b2b77a04SHong Zhang +   fin - (optional) input vector of forward FFTW
545b2b77a04SHong Zhang -   fout - (optional) output vector of forward FFTW
546b2b77a04SHong Zhang 
547b2b77a04SHong Zhang   Level: advanced
548b2b77a04SHong Zhang 
549b2b77a04SHong Zhang .seealso: MatCreateFFTW()
550b2b77a04SHong Zhang */
551b2b77a04SHong Zhang PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
552b2b77a04SHong Zhang {
553b2b77a04SHong Zhang   PetscErrorCode ierr;
554b2b77a04SHong Zhang   PetscMPIInt    size,rank;
555b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
556b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
557c92418ccSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
5589496c9aeSAmlan Barua   PetscInt       N=fft->N;
559e81bb599SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
560b2b77a04SHong Zhang 
561b2b77a04SHong Zhang   PetscFunctionBegin;
562b2b77a04SHong Zhang   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
563b2b77a04SHong Zhang   PetscValidType(A,1);
564b2b77a04SHong Zhang 
565b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
566b2b77a04SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
567b2b77a04SHong Zhang   if (size == 1){ /* sequential case */
568e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX)
569b2b77a04SHong Zhang     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
570b2b77a04SHong Zhang     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
571e5d7f247SAmlan Barua #else
572e81bb599SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
573e81bb599SAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
574e81bb599SAmlan Barua #endif
575b2b77a04SHong Zhang   } else {        /* mpi case */
576b2b77a04SHong Zhang     ptrdiff_t      alloc_local,local_n0,local_0_start;
5776882372aSHong Zhang     ptrdiff_t      local_n1,local_1_end;
578b2b77a04SHong Zhang     fftw_complex   *data_fin,*data_fout;
5799496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
58051d1eed7SAmlan Barua     double         *data_finr ;
581b3a17365SAmlan Barua     ptrdiff_t      local_1_start,temp;
5829496c9aeSAmlan Barua     PetscInt       vsize,n1,N1;
5839496c9aeSAmlan Barua #endif
5849496c9aeSAmlan Barua 
585c92418ccSAmlan Barua //    PetscInt ctr;
586c92418ccSAmlan Barua //    ptrdiff_t      ndim1,*pdim;
587c92418ccSAmlan Barua //    ndim1=(ptrdiff_t) ndim;
588c92418ccSAmlan Barua //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
58911902ff2SHong Zhang 
590c92418ccSAmlan Barua //    for(ctr=0;ctr<ndim;ctr++)
591f76f76beSAmlan Barua //        {k
592c92418ccSAmlan Barua //           pdim[ctr] = dim[ctr];
593c92418ccSAmlan Barua //       }
594b2b77a04SHong Zhang 
595f76f76beSAmlan Barua 
596f76f76beSAmlan Barua 
597b2b77a04SHong Zhang     switch (ndim){
598b2b77a04SHong Zhang     case 1:
5996882372aSHong Zhang       /* Get local size */
600e5d7f247SAmlan Barua       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
601c92418ccSAmlan Barua //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
6026882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
6039496c9aeSAmlan Barua       printf("The values of local_n0 and local_n1 are %ld and %ld\n",local_n0,local_n1);
6046882372aSHong Zhang       if (fin) {
6056882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6066882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
6076882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6086882372aSHong Zhang       }
6096882372aSHong Zhang       if (fout) {
6106882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
61157625b84SAmlan Barua         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6126882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6136882372aSHong Zhang       }
614b2b77a04SHong Zhang       break;
615b2b77a04SHong Zhang     case 2:
6163c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6173c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
6183c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
6193c3a9c75SAmlan Barua       if (fin) {
6203c3a9c75SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
62154dd5118SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
6223c3a9c75SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
62309dd8a53SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
6243c3a9c75SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6253c3a9c75SAmlan Barua       }
62657625b84SAmlan Barua       n1 = 2*local_n1*(dim[0]);
62757625b84SAmlan Barua       //n1 = 2*local_n1*dim[1];
62857625b84SAmlan Barua       printf("The values are %ld\n",local_n1);
6293c3a9c75SAmlan Barua       if (fout) {
63009dd8a53SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
63109dd8a53SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6323c3a9c75SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6333c3a9c75SAmlan Barua       }
634f76f76beSAmlan Barua       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
6353c3a9c75SAmlan Barua 
6363c3a9c75SAmlan Barua #else
637b2b77a04SHong Zhang       /* Get local size */
63864657d84SAmlan Barua      //printf("Hope this does not come here");
639b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
640b2b77a04SHong Zhang       if (fin) {
641b2b77a04SHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
642b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
643b2b77a04SHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
644b2b77a04SHong Zhang       }
645b2b77a04SHong Zhang       if (fout) {
646b2b77a04SHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
647b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
648b2b77a04SHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
649b2b77a04SHong Zhang       }
65064657d84SAmlan Barua      //printf("Hope this does not come here");
6513c3a9c75SAmlan Barua #endif
652b2b77a04SHong Zhang       break;
653b2b77a04SHong Zhang     case 3:
6546882372aSHong Zhang       /* Get local size */
6553c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
65651d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
65751d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
65851d1eed7SAmlan Barua       if (fin) {
65951d1eed7SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
66051d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
66151d1eed7SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
66251d1eed7SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
66351d1eed7SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
66451d1eed7SAmlan Barua       }
66557625b84SAmlan Barua       n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
66651d1eed7SAmlan Barua       if (fout) {
66751d1eed7SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
66851d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
66951d1eed7SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
67051d1eed7SAmlan Barua       }
67151d1eed7SAmlan Barua 
67251d1eed7SAmlan Barua 
6733c3a9c75SAmlan Barua #else
6746882372aSHong Zhang       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
67511902ff2SHong Zhang //      printf("The quantity n is %d",n);
6766882372aSHong Zhang       if (fin) {
6776882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6786882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
6796882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6806882372aSHong Zhang       }
6816882372aSHong Zhang       if (fout) {
6826882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6836882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6846882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6856882372aSHong Zhang       }
6863c3a9c75SAmlan Barua #endif
687b2b77a04SHong Zhang       break;
688b2b77a04SHong Zhang     default:
6896882372aSHong Zhang       /* Get local size */
6903c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
691b3a17365SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
692b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
693b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
694b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
695b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
696b3a17365SAmlan Barua       if (fin) {
697b3a17365SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
698b3a17365SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
699b3a17365SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
700b3a17365SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
701b3a17365SAmlan Barua       }
70257625b84SAmlan Barua       temp = fftw->partial_dim;
70357625b84SAmlan Barua       fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
70457625b84SAmlan Barua 
70557625b84SAmlan Barua       n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
706b3a17365SAmlan Barua       if (fout) {
707b3a17365SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
70857625b84SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
709b3a17365SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
710b3a17365SAmlan Barua       }
711b3a17365SAmlan Barua 
7123c3a9c75SAmlan Barua #else
713c92418ccSAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
7146882372aSHong Zhang       if (fin) {
7156882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7166882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
7176882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
7186882372aSHong Zhang       }
7196882372aSHong Zhang       if (fout) {
7206882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7216882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
7226882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
7236882372aSHong Zhang       }
7243c3a9c75SAmlan Barua #endif
725b2b77a04SHong Zhang       break;
726b2b77a04SHong Zhang     }
727b2b77a04SHong Zhang   }
72854dd5118SAmlan Barua //  if (fin){
72954dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
73054dd5118SAmlan Barua //  }
73154dd5118SAmlan Barua //  if (fout){
73254dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
73354dd5118SAmlan Barua //  }
734b2b77a04SHong Zhang   PetscFunctionReturn(0);
735b2b77a04SHong Zhang }
736b2b77a04SHong Zhang 
737b2b77a04SHong Zhang #undef __FUNCT__
7383c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT"
7393c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
7403c3a9c75SAmlan Barua {
7413c3a9c75SAmlan Barua   PetscErrorCode ierr;
7423c3a9c75SAmlan Barua   PetscFunctionBegin;
7433c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
7443c3a9c75SAmlan Barua   PetscFunctionReturn(0);
7453c3a9c75SAmlan Barua }
74654efbe56SHong Zhang 
747b2b77a04SHong Zhang /*
7489496c9aeSAmlan Barua       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block. For real
7499496c9aeSAmlan Barua       parallel FFT, this routine also performs padding of right number of zeros at the end of the fastetst
7509496c9aeSAmlan Barua       changing dimension.
7513c3a9c75SAmlan Barua   Input A, x, y
7523c3a9c75SAmlan Barua   A - FFTW matrix
75354dd5118SAmlan Barua   x - user data
754b2b77a04SHong Zhang   Options Database Keys:
755b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags
756b2b77a04SHong Zhang 
757b2b77a04SHong Zhang    Level: intermediate
758b2b77a04SHong Zhang 
759b2b77a04SHong Zhang */
7603c3a9c75SAmlan Barua 
7613c3a9c75SAmlan Barua EXTERN_C_BEGIN
7623c3a9c75SAmlan Barua #undef __FUNCT__
7633c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW"
7643c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
7653c3a9c75SAmlan Barua {
7663c3a9c75SAmlan Barua   PetscErrorCode ierr;
7673c3a9c75SAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
7683c3a9c75SAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
7693c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
7709496c9aeSAmlan Barua   PetscInt       N=fft->N;
771b223cc97SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
7729496c9aeSAmlan Barua   PetscInt       low;
7738ca4c034SAmlan Barua   PetscInt       rank,size,vsize,vsize1;
7743c3a9c75SAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
7759496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
7763c3a9c75SAmlan Barua   VecScatter     vecscat;
7773c3a9c75SAmlan Barua   IS             list1,list2;
7789496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
7799496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
7809496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
7819496c9aeSAmlan Barua   PetscInt       N1, n1 ,NM;
7829496c9aeSAmlan Barua   ptrdiff_t      temp;
7839496c9aeSAmlan Barua #endif
7843c3a9c75SAmlan Barua 
785f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
786f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
7873c3a9c75SAmlan Barua   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
7883c3a9c75SAmlan Barua 
789e81bb599SAmlan Barua   if (size==1)
790e81bb599SAmlan Barua     {
7918ca4c034SAmlan Barua           ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
7928ca4c034SAmlan Barua           ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
7938ca4c034SAmlan Barua           printf("The values of sizes are %d and %d",vsize,vsize1);
7949496c9aeSAmlan Barua           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
7956971673cSAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
7966971673cSAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7976971673cSAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7986971673cSAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
799b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
800e81bb599SAmlan Barua     }
801e81bb599SAmlan Barua 
802e81bb599SAmlan Barua  else{
803e81bb599SAmlan Barua 
8043c3a9c75SAmlan Barua  switch (ndim){
8053c3a9c75SAmlan Barua  case 1:
80664657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
80764657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
80864657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
80964657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
81064657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
81164657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
81264657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
81364657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
81464657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
81564657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
81664657d84SAmlan Barua #else
817e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
81864657d84SAmlan Barua #endif
8193c3a9c75SAmlan Barua  break;
8203c3a9c75SAmlan Barua  case 2:
821bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
822bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
823bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
824bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
825bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
826bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
828bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
829bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
830bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
831bd59e6c6SAmlan Barua #else
8323c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
8333c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
8349496c9aeSAmlan Barua       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
8359496c9aeSAmlan Barua       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
8369496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
8379496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
8383c3a9c75SAmlan Barua 
8393c3a9c75SAmlan Barua       if (dim[1]%2==0)
8403c3a9c75SAmlan Barua         NM = dim[1]+2;
8413c3a9c75SAmlan Barua       else
8423c3a9c75SAmlan Barua         NM = dim[1]+1;
8433c3a9c75SAmlan Barua 
8443c3a9c75SAmlan Barua       for (i=0;i<local_n0;i++){
8453c3a9c75SAmlan Barua          for (j=0;j<dim[1];j++){
8463c3a9c75SAmlan Barua             tempindx = i*dim[1] + j;
8473c3a9c75SAmlan Barua             tempindx1 = i*NM + j;
8485b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
8493c3a9c75SAmlan Barua             indx2[tempindx]=low+tempindx1;
8503c3a9c75SAmlan Barua         }
8513c3a9c75SAmlan Barua      }
8523c3a9c75SAmlan Barua 
8533c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
8543c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
8553c3a9c75SAmlan Barua 
856f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
857f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
858f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
860b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
861b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
862b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
863b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
864bd59e6c6SAmlan Barua #endif
8659496c9aeSAmlan Barua  break;
8663c3a9c75SAmlan Barua 
8673c3a9c75SAmlan Barua  case 3:
868bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
869bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
870bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
871bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
872bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
873bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
875bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
876bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
877bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
878bd59e6c6SAmlan Barua #else
87951d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
88051d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
88151d1eed7SAmlan Barua 
8829496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
8839496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
88451d1eed7SAmlan Barua 
88551d1eed7SAmlan Barua       if (dim[2]%2==0)
88651d1eed7SAmlan Barua         NM = dim[2]+2;
88751d1eed7SAmlan Barua       else
88851d1eed7SAmlan Barua         NM = dim[2]+1;
88951d1eed7SAmlan Barua 
89051d1eed7SAmlan Barua       for (i=0;i<local_n0;i++){
89151d1eed7SAmlan Barua          for (j=0;j<dim[1];j++){
89251d1eed7SAmlan Barua             for (k=0;k<dim[2];k++){
89351d1eed7SAmlan Barua                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
89451d1eed7SAmlan Barua                tempindx1 = i*dim[1]*NM + j*NM + k;
89551d1eed7SAmlan Barua                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
89651d1eed7SAmlan Barua                indx2[tempindx]=low+tempindx1;
89751d1eed7SAmlan Barua             }
89851d1eed7SAmlan Barua          }
89951d1eed7SAmlan Barua       }
90051d1eed7SAmlan Barua 
90151d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
90251d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
90351d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
90451d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
90551d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
90651d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
907b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
908b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
909b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
910b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
911bd59e6c6SAmlan Barua #endif
9129496c9aeSAmlan Barua  break;
9133c3a9c75SAmlan Barua 
9143c3a9c75SAmlan Barua  default:
915bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
916bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
917bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
918bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
919bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
920bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
922bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
923bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
924bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
925bd59e6c6SAmlan Barua #else
926e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
927e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
928e5d7f247SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
929e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
930e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
931e5d7f247SAmlan Barua 
932e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
933e5d7f247SAmlan Barua 
934e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
935e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
936e5d7f247SAmlan Barua 
937e5d7f247SAmlan Barua       if (dim[ndim-1]%2==0)
938ba6e06d0SAmlan Barua         NM = 2;
939e5d7f247SAmlan Barua       else
940ba6e06d0SAmlan Barua         NM = 1;
941e5d7f247SAmlan Barua 
9426971673cSAmlan Barua       j = low;
9436971673cSAmlan Barua       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
9446971673cSAmlan Barua          {
9456971673cSAmlan Barua           indx1[i] = local_0_start*partial_dim + i;
9466971673cSAmlan Barua           indx2[i] = j;
9476971673cSAmlan Barua           if (k%dim[ndim-1]==0)
9486971673cSAmlan Barua             { j+=NM;}
9496971673cSAmlan Barua           j++;
9506971673cSAmlan Barua          }
9516971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
9526971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
9536971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
9546971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9556971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9566971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
957b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
958b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
959b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
960b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
961bd59e6c6SAmlan Barua #endif
9629496c9aeSAmlan Barua  break;
9633c3a9c75SAmlan Barua   }
964e81bb599SAmlan Barua  }
965*1abc6020SAmlan Barua  return(0);
9663c3a9c75SAmlan Barua }
9673c3a9c75SAmlan Barua EXTERN_C_END
9683c3a9c75SAmlan Barua 
9693c3a9c75SAmlan Barua #undef __FUNCT__
9703c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT"
9713c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
9723c3a9c75SAmlan Barua {
9733c3a9c75SAmlan Barua   PetscErrorCode ierr;
9743c3a9c75SAmlan Barua   PetscFunctionBegin;
9753c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
9763c3a9c75SAmlan Barua   PetscFunctionReturn(0);
9773c3a9c75SAmlan Barua }
97854efbe56SHong Zhang 
9795b3e41ffSAmlan Barua /*
9805b3e41ffSAmlan Barua       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
9815b3e41ffSAmlan Barua   Input A, x, y
9825b3e41ffSAmlan Barua   A - FFTW matrix
9835b3e41ffSAmlan Barua   x - FFTW vector
9845b3e41ffSAmlan Barua   y - PETSc vector that user can read
9855b3e41ffSAmlan Barua   Options Database Keys:
9865b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
9875b3e41ffSAmlan Barua 
9885b3e41ffSAmlan Barua    Level: intermediate
9895b3e41ffSAmlan Barua 
9903c3a9c75SAmlan Barua */
9913c3a9c75SAmlan Barua 
9923c3a9c75SAmlan Barua EXTERN_C_BEGIN
9933c3a9c75SAmlan Barua #undef __FUNCT__
9945b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW"
9955b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
9965b3e41ffSAmlan Barua {
9975b3e41ffSAmlan Barua   PetscErrorCode ierr;
9985b3e41ffSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
9995b3e41ffSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
10005b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
10019496c9aeSAmlan Barua   PetscInt       N=fft->N;
1002b3655f67SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
10039496c9aeSAmlan Barua   PetscInt       low;
10049496c9aeSAmlan Barua   PetscInt       rank,size;
10055b3e41ffSAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
10069496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
10075b3e41ffSAmlan Barua   VecScatter     vecscat;
10085b3e41ffSAmlan Barua   IS             list1,list2;
10099496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
10109496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
10119496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
10129496c9aeSAmlan Barua   PetscInt       N1, n1 ,NM;
10139496c9aeSAmlan Barua   ptrdiff_t      temp;
10149496c9aeSAmlan Barua #endif
10159496c9aeSAmlan Barua 
10165b3e41ffSAmlan Barua 
10175b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
10185b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1019b3655f67SAmlan Barua   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr);
10205b3e41ffSAmlan Barua 
1021e81bb599SAmlan Barua   if (size==1){
1022b3655f67SAmlan Barua     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
10236971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
10246971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10256971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10266971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1027b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
1028e81bb599SAmlan Barua   }
1029e81bb599SAmlan Barua   else{
1030e81bb599SAmlan Barua 
1031e81bb599SAmlan Barua   switch (ndim){
1032e81bb599SAmlan Barua   case 1:
103364657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
103464657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
10359496c9aeSAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1);
10369496c9aeSAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2);
103764657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
103864657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
103964657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
104064657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
104164657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
104264657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
104364657d84SAmlan Barua #else
10446a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
104564657d84SAmlan Barua #endif
10465b3e41ffSAmlan Barua   break;
10475b3e41ffSAmlan Barua   case 2:
1048bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1049bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1050bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1051bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
1052bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1053bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1054bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1055bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1056bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1057bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1058bd59e6c6SAmlan Barua #else
10595b3e41ffSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
10605b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
10615b3e41ffSAmlan Barua 
10629496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
10639496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
10645b3e41ffSAmlan Barua 
10655b3e41ffSAmlan Barua       if (dim[1]%2==0)
10665b3e41ffSAmlan Barua         NM = dim[1]+2;
10675b3e41ffSAmlan Barua       else
10685b3e41ffSAmlan Barua         NM = dim[1]+1;
10695b3e41ffSAmlan Barua 
10705b3e41ffSAmlan Barua       for (i=0;i<local_n0;i++){
10715b3e41ffSAmlan Barua          for (j=0;j<dim[1];j++){
10725b3e41ffSAmlan Barua             tempindx = i*dim[1] + j;
10735b3e41ffSAmlan Barua             tempindx1 = i*NM + j;
10745b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
10755b3e41ffSAmlan Barua             indx2[tempindx]=low+tempindx1;
10765b3e41ffSAmlan Barua          }
10775b3e41ffSAmlan Barua        }
10785b3e41ffSAmlan Barua 
10795b3e41ffSAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
10805b3e41ffSAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
10815b3e41ffSAmlan Barua 
10825b3e41ffSAmlan Barua        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
10835b3e41ffSAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10845b3e41ffSAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10855b3e41ffSAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1086b223cc97SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1087b223cc97SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1088b223cc97SAmlan Barua        ierr = PetscFree(indx1);CHKERRQ(ierr);
1089b223cc97SAmlan Barua        ierr = PetscFree(indx2);CHKERRQ(ierr);
1090bd59e6c6SAmlan Barua #endif
10919496c9aeSAmlan Barua   break;
10925b3e41ffSAmlan Barua   case 3:
1093bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1094bd59e6c6SAmlan Barua        alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1095bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1096bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1097bd59e6c6SAmlan Barua        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1098bd59e6c6SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1099bd59e6c6SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1100bd59e6c6SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1101bd59e6c6SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1102bd59e6c6SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1103bd59e6c6SAmlan Barua #else
1104bd59e6c6SAmlan Barua 
110551d1eed7SAmlan Barua        alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
110651d1eed7SAmlan Barua        N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
110751d1eed7SAmlan Barua 
11089496c9aeSAmlan Barua //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
11099496c9aeSAmlan Barua //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
11109496c9aeSAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
11119496c9aeSAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
111251d1eed7SAmlan Barua 
111351d1eed7SAmlan Barua        if (dim[2]%2==0)
111451d1eed7SAmlan Barua         NM = dim[2]+2;
111551d1eed7SAmlan Barua        else
111651d1eed7SAmlan Barua         NM = dim[2]+1;
111751d1eed7SAmlan Barua 
111851d1eed7SAmlan Barua        for (i=0;i<local_n0;i++){
111951d1eed7SAmlan Barua           for (j=0;j<dim[1];j++){
112051d1eed7SAmlan Barua              for (k=0;k<dim[2];k++){
112151d1eed7SAmlan Barua                 tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
112251d1eed7SAmlan Barua                 tempindx1 = i*dim[1]*NM + j*NM + k;
112351d1eed7SAmlan Barua                 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
112451d1eed7SAmlan Barua                 indx2[tempindx]=low+tempindx1;
112551d1eed7SAmlan Barua              }
112651d1eed7SAmlan Barua           }
112751d1eed7SAmlan Barua        }
112851d1eed7SAmlan Barua 
112951d1eed7SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
113051d1eed7SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
113151d1eed7SAmlan Barua 
113251d1eed7SAmlan Barua        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
113351d1eed7SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
113451d1eed7SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
113551d1eed7SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1136b223cc97SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1137b223cc97SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1138b223cc97SAmlan Barua        ierr = PetscFree(indx1);CHKERRQ(ierr);
1139b223cc97SAmlan Barua        ierr = PetscFree(indx2);CHKERRQ(ierr);
1140bd59e6c6SAmlan Barua #endif
11419496c9aeSAmlan Barua   break;
11425b3e41ffSAmlan Barua   default:
1143bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1144bd59e6c6SAmlan Barua        alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1145bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1146bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1147bd59e6c6SAmlan Barua        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1148bd59e6c6SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1149bd59e6c6SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1150bd59e6c6SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1151bd59e6c6SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1152bd59e6c6SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1153bd59e6c6SAmlan Barua #else
1154ba6e06d0SAmlan Barua        temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1155ba6e06d0SAmlan Barua        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1156ba6e06d0SAmlan Barua        alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1157ba6e06d0SAmlan Barua        N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1158ba6e06d0SAmlan Barua        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1159ba6e06d0SAmlan Barua 
1160ba6e06d0SAmlan Barua        partial_dim = fftw->partial_dim;
1161ba6e06d0SAmlan Barua 
1162ba6e06d0SAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1163ba6e06d0SAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1164ba6e06d0SAmlan Barua 
1165ba6e06d0SAmlan Barua        if (dim[ndim-1]%2==0)
1166ba6e06d0SAmlan Barua          NM = 2;
1167ba6e06d0SAmlan Barua        else
1168ba6e06d0SAmlan Barua          NM = 1;
1169ba6e06d0SAmlan Barua 
1170ba6e06d0SAmlan Barua        j = low;
1171ba6e06d0SAmlan Barua        for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1172ba6e06d0SAmlan Barua           {
1173ba6e06d0SAmlan Barua            indx1[i] = local_0_start*partial_dim + i;
1174ba6e06d0SAmlan Barua            indx2[i] = j;
1175ba6e06d0SAmlan Barua            if (k%dim[ndim-1]==0)
1176ba6e06d0SAmlan Barua              { j+=NM;}
1177ba6e06d0SAmlan Barua            j++;
1178ba6e06d0SAmlan Barua           }
1179ba6e06d0SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1180ba6e06d0SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1181ba6e06d0SAmlan Barua 
1182ba6e06d0SAmlan Barua        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1183ba6e06d0SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1184ba6e06d0SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1185ba6e06d0SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1186b223cc97SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1187b223cc97SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1188b223cc97SAmlan Barua        ierr = PetscFree(indx1);CHKERRQ(ierr);
1189b223cc97SAmlan Barua        ierr = PetscFree(indx2);CHKERRQ(ierr);
1190bd59e6c6SAmlan Barua #endif
11919496c9aeSAmlan Barua   break;
11925b3e41ffSAmlan Barua   }
1193e81bb599SAmlan Barua   }
11945b3e41ffSAmlan Barua   return 0;
11955b3e41ffSAmlan Barua }
11965b3e41ffSAmlan Barua EXTERN_C_END
11975b3e41ffSAmlan Barua 
11985b3e41ffSAmlan Barua EXTERN_C_BEGIN
11995b3e41ffSAmlan Barua #undef __FUNCT__
12003c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
12013c3a9c75SAmlan Barua /*
12023c3a9c75SAmlan Barua       MatCreate_FFTW - Creates a matrix object that provides FFT
12033c3a9c75SAmlan Barua   via the external package FFTW
12043c3a9c75SAmlan Barua   Options Database Keys:
12053c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
12063c3a9c75SAmlan Barua 
12073c3a9c75SAmlan Barua    Level: intermediate
12083c3a9c75SAmlan Barua 
12093c3a9c75SAmlan Barua */
12103c3a9c75SAmlan Barua 
1211b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A)
1212b2b77a04SHong Zhang {
1213b2b77a04SHong Zhang   PetscErrorCode ierr;
1214b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
1215b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1216b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1217b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1218b2b77a04SHong Zhang   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1219b2b77a04SHong Zhang   PetscBool      flg;
12204f48bc67SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr;
122111902ff2SHong Zhang   PetscMPIInt    size,rank;
12229496c9aeSAmlan Barua   ptrdiff_t      *pdim;
12239496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
12249496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12259496c9aeSAmlan Barua    ptrdiff_t     temp;
12268ca4c034SAmlan Barua    PetscInt      N1,tot_dim;
12274f48bc67SAmlan Barua #else
12284f48bc67SAmlan Barua    PetscInt n1;
12299496c9aeSAmlan Barua #endif
12309496c9aeSAmlan Barua 
1231b2b77a04SHong Zhang 
1232b2b77a04SHong Zhang   PetscFunctionBegin;
1233b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
123411902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
123554dd5118SAmlan Barua   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1236c92418ccSAmlan Barua 
123711902ff2SHong Zhang   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
123811902ff2SHong Zhang   pdim[0] = dim[0];
12398ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12408ca4c034SAmlan Barua   tot_dim = 2*dim[0];
12418ca4c034SAmlan Barua #endif
124211902ff2SHong Zhang   for (ctr=1;ctr<ndim;ctr++)
124311902ff2SHong Zhang      {
12446882372aSHong Zhang           partial_dim *= dim[ctr];
124511902ff2SHong Zhang           pdim[ctr] = dim[ctr];
12468ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12478ca4c034SAmlan Barua           if (ctr==ndim-1)
12488ca4c034SAmlan Barua             tot_dim *= (dim[ctr]/2+1);
12498ca4c034SAmlan Barua           else
12508ca4c034SAmlan Barua             tot_dim *= dim[ctr];
12518ca4c034SAmlan Barua #endif
12526882372aSHong Zhang      }
12533c3a9c75SAmlan Barua 
1254b2b77a04SHong Zhang   if (size == 1) {
1255e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1256b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1257b2b77a04SHong Zhang     n = N;
1258e81bb599SAmlan Barua #else
1259e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1260e81bb599SAmlan Barua     n = tot_dim;
1261e81bb599SAmlan Barua #endif
1262e81bb599SAmlan Barua 
1263b2b77a04SHong Zhang   } else {
1264*1abc6020SAmlan Barua     ptrdiff_t alloc_local,local_n0,local_0_start;
1265b2b77a04SHong Zhang     switch (ndim){
1266b2b77a04SHong Zhang     case 1:
12673c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12683c3a9c75SAmlan Barua    SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1269e5d7f247SAmlan Barua #else
12709496c9aeSAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
12716882372aSHong Zhang       n = (PetscInt)local_n0;
12729496c9aeSAmlan Barua       n1 = (PetscInt) local_n1;
12739496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1274e5d7f247SAmlan Barua #endif
1275b2b77a04SHong Zhang       break;
1276b2b77a04SHong Zhang     case 2:
12775b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
1278b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1279b2b77a04SHong Zhang       /*
1280b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1281b2b77a04SHong Zhang        PetscSynchronizedFlush(comm);
1282b2b77a04SHong Zhang        */
1283b2b77a04SHong Zhang       n = (PetscInt)local_n0*dim[1];
1284b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
12855b3e41ffSAmlan Barua #else
12865b3e41ffSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
12875b3e41ffSAmlan Barua       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1288c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
12895b3e41ffSAmlan Barua #endif
1290b2b77a04SHong Zhang       break;
1291b2b77a04SHong Zhang     case 3:
129251d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
129351d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
12946882372aSHong Zhang       n = (PetscInt)local_n0*dim[1]*dim[2];
12956882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
129651d1eed7SAmlan Barua #else
129751d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
129851d1eed7SAmlan Barua       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1299c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
130051d1eed7SAmlan Barua #endif
1301b2b77a04SHong Zhang       break;
1302b2b77a04SHong Zhang     default:
1303b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
130411902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
13056882372aSHong Zhang       n = (PetscInt)local_n0*partial_dim;
13066882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1307b3a17365SAmlan Barua #else
1308b3a17365SAmlan Barua       temp = pdim[ndim-1];
1309b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
1310b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1311b3a17365SAmlan Barua       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1312b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1313b3a17365SAmlan Barua       pdim[ndim-1] = temp;
1314c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1315b3a17365SAmlan Barua #endif
1316b2b77a04SHong Zhang       break;
1317b2b77a04SHong Zhang     }
1318b2b77a04SHong Zhang   }
1319b2b77a04SHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1320b2b77a04SHong Zhang   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1321b2b77a04SHong Zhang   fft->data = (void*)fftw;
1322b2b77a04SHong Zhang 
1323b2b77a04SHong Zhang   fft->n           = n;
1324c92418ccSAmlan Barua   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1325e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
1326c92418ccSAmlan Barua   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1327b1301b2eSHong Zhang   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1328c92418ccSAmlan Barua 
1329b2b77a04SHong Zhang   fftw->p_forward  = 0;
1330b2b77a04SHong Zhang   fftw->p_backward = 0;
1331b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1332b2b77a04SHong Zhang 
1333b2b77a04SHong Zhang   if (size == 1){
1334b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1335b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1336b2b77a04SHong Zhang   } else {
1337b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1338b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1339b2b77a04SHong Zhang   }
1340b2b77a04SHong Zhang   fft->matdestroy          = MatDestroy_FFTW;
13414be45526SHong Zhang   //A->ops->getvecs       = MatGetVecs_FFTW;
1342b2b77a04SHong Zhang   A->assembled          = PETSC_TRUE;
13434be45526SHong Zhang   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);
13443c3a9c75SAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
13455b3e41ffSAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
1346b2b77a04SHong Zhang 
1347b2b77a04SHong Zhang   /* get runtime options */
1348b2b77a04SHong Zhang   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1349b2b77a04SHong Zhang     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1350b2b77a04SHong Zhang     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1351b2b77a04SHong Zhang   PetscOptionsEnd();
1352b2b77a04SHong Zhang   PetscFunctionReturn(0);
1353b2b77a04SHong Zhang }
1354b2b77a04SHong Zhang EXTERN_C_END
13553c3a9c75SAmlan Barua 
13563c3a9c75SAmlan Barua 
13573c3a9c75SAmlan Barua 
13583c3a9c75SAmlan Barua 
1359