xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 8ca4c034b1bc534a44cb6bf3488e1c5b0979d612)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14e5d7f247SAmlan Barua   PetscInt   partial_dim;
15b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
16b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
19b2b77a04SHong Zhang } Mat_FFTW;
20b2b77a04SHong Zhang 
21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
274be45526SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); // to be removed!
28b2b77a04SHong Zhang 
29b2b77a04SHong Zhang #undef __FUNCT__
30b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
31b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
32b2b77a04SHong Zhang {
33b2b77a04SHong Zhang   PetscErrorCode ierr;
34b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
35b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
36b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
37b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
38b2b77a04SHong Zhang 
39b2b77a04SHong Zhang   PetscFunctionBegin;
40b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
41b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
42b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
43b2b77a04SHong Zhang     switch (ndim){
44b2b77a04SHong Zhang     case 1:
4558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
46b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
4758a912c5SAmlan Barua #else
4858a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
4958a912c5SAmlan Barua #endif
50b2b77a04SHong Zhang       break;
51b2b77a04SHong Zhang     case 2:
5258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
53b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
5458a912c5SAmlan Barua #else
5558a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
5658a912c5SAmlan Barua #endif
57b2b77a04SHong Zhang       break;
58b2b77a04SHong Zhang     case 3:
5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
60b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6158a912c5SAmlan Barua #else
6258a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
6358a912c5SAmlan Barua #endif
64b2b77a04SHong Zhang       break;
65b2b77a04SHong Zhang     default:
6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
67b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6858a912c5SAmlan Barua #else
6958a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
7058a912c5SAmlan Barua #endif
71b2b77a04SHong Zhang       break;
72b2b77a04SHong Zhang     }
73b2b77a04SHong Zhang     fftw->finarray  = x_array;
74b2b77a04SHong Zhang     fftw->foutarray = y_array;
75b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
76b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
77b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
78b2b77a04SHong Zhang   } else { /* use existing plan */
79b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
80b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
81b2b77a04SHong Zhang     } else {
82b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
83b2b77a04SHong Zhang     }
84b2b77a04SHong Zhang   }
85b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
86b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
87b2b77a04SHong Zhang   PetscFunctionReturn(0);
88b2b77a04SHong Zhang }
89b2b77a04SHong Zhang 
90b2b77a04SHong Zhang #undef __FUNCT__
91b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
92b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
93b2b77a04SHong Zhang {
94b2b77a04SHong Zhang   PetscErrorCode ierr;
95b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
96b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
97b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
98b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
99b2b77a04SHong Zhang 
100b2b77a04SHong Zhang   PetscFunctionBegin;
101b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
102b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
103b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
104b2b77a04SHong Zhang     switch (ndim){
105b2b77a04SHong Zhang     case 1:
10658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
107b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
10858a912c5SAmlan Barua #else
109e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
11058a912c5SAmlan Barua #endif
111b2b77a04SHong Zhang       break;
112b2b77a04SHong Zhang     case 2:
11358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
114b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
11558a912c5SAmlan Barua #else
116e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
11758a912c5SAmlan Barua #endif
118b2b77a04SHong Zhang       break;
119b2b77a04SHong Zhang     case 3:
12058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
121b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12258a912c5SAmlan Barua #else
123e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
12458a912c5SAmlan Barua #endif
125b2b77a04SHong Zhang       break;
126b2b77a04SHong Zhang     default:
12758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
128b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12958a912c5SAmlan Barua #else
130e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
13158a912c5SAmlan Barua #endif
132b2b77a04SHong Zhang       break;
133b2b77a04SHong Zhang     }
134b2b77a04SHong Zhang     fftw->binarray  = x_array;
135b2b77a04SHong Zhang     fftw->boutarray = y_array;
136b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
137b2b77a04SHong Zhang   } else { /* use existing plan */
138b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
139b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
140b2b77a04SHong Zhang     } else {
141b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
142b2b77a04SHong Zhang     }
143b2b77a04SHong Zhang   }
144b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
145b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
146b2b77a04SHong Zhang   PetscFunctionReturn(0);
147b2b77a04SHong Zhang }
148b2b77a04SHong Zhang 
149b2b77a04SHong Zhang #undef __FUNCT__
150b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
151b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
152b2b77a04SHong Zhang {
153b2b77a04SHong Zhang   PetscErrorCode ierr;
154b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
155b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
156b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
157c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
158b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
159b2b77a04SHong Zhang 
160b2b77a04SHong Zhang   PetscFunctionBegin;
161b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
162b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
163b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
164b2b77a04SHong Zhang     switch (ndim){
165b2b77a04SHong Zhang     case 1:
1663c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
167b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
168ae0a50aaSHong Zhang #else
169ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
1703c3a9c75SAmlan Barua #endif
171b2b77a04SHong Zhang       break;
172b2b77a04SHong Zhang     case 2:
1733c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
174b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1753c3a9c75SAmlan Barua #else
1763c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1773c3a9c75SAmlan Barua #endif
178b2b77a04SHong Zhang       break;
179b2b77a04SHong Zhang     case 3:
1803c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
181b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1823c3a9c75SAmlan Barua #else
18351d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1843c3a9c75SAmlan Barua #endif
185b2b77a04SHong Zhang       break;
186b2b77a04SHong Zhang     default:
1873c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
188c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1893c3a9c75SAmlan Barua #else
1903c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1913c3a9c75SAmlan Barua #endif
192b2b77a04SHong Zhang       break;
193b2b77a04SHong Zhang     }
194b2b77a04SHong Zhang     fftw->finarray  = x_array;
195b2b77a04SHong Zhang     fftw->foutarray = y_array;
196b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
197b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
198b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
199b2b77a04SHong Zhang   } else { /* use existing plan */
200b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
201b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
202b2b77a04SHong Zhang     } else {
203b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
204b2b77a04SHong Zhang     }
205b2b77a04SHong Zhang   }
206b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
207b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
208b2b77a04SHong Zhang   PetscFunctionReturn(0);
209b2b77a04SHong Zhang }
210b2b77a04SHong Zhang 
211b2b77a04SHong Zhang #undef __FUNCT__
212b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
213b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
214b2b77a04SHong Zhang {
215b2b77a04SHong Zhang   PetscErrorCode ierr;
216b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
217b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
218b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
219c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
220b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
221b2b77a04SHong Zhang 
222b2b77a04SHong Zhang   PetscFunctionBegin;
223b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
224b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
225b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
226b2b77a04SHong Zhang     switch (ndim){
227b2b77a04SHong Zhang     case 1:
2283c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
229b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
230ae0a50aaSHong Zhang #else
231ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
2323c3a9c75SAmlan Barua #endif
233b2b77a04SHong Zhang       break;
234b2b77a04SHong Zhang     case 2:
2353c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
236b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2373c3a9c75SAmlan Barua #else
2383c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2393c3a9c75SAmlan Barua #endif
240b2b77a04SHong Zhang       break;
241b2b77a04SHong Zhang     case 3:
2423c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
243b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2443c3a9c75SAmlan Barua #else
2453c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2463c3a9c75SAmlan Barua #endif
247b2b77a04SHong Zhang       break;
248b2b77a04SHong Zhang     default:
2493c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
250c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2513c3a9c75SAmlan Barua #else
2523c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2533c3a9c75SAmlan Barua #endif
254b2b77a04SHong Zhang       break;
255b2b77a04SHong Zhang     }
256b2b77a04SHong Zhang     fftw->binarray  = x_array;
257b2b77a04SHong Zhang     fftw->boutarray = y_array;
258b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
259b2b77a04SHong Zhang   } else { /* use existing plan */
260b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
261b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
262b2b77a04SHong Zhang     } else {
263b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
264b2b77a04SHong Zhang     }
265b2b77a04SHong Zhang   }
266b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
267b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
268b2b77a04SHong Zhang   PetscFunctionReturn(0);
269b2b77a04SHong Zhang }
270b2b77a04SHong Zhang 
271b2b77a04SHong Zhang #undef __FUNCT__
272b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
273b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
274b2b77a04SHong Zhang {
275b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
276b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
277b2b77a04SHong Zhang   PetscErrorCode ierr;
278b2b77a04SHong Zhang 
279b2b77a04SHong Zhang   PetscFunctionBegin;
280b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
281b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
282b1301b2eSHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
283bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
284b2b77a04SHong Zhang   PetscFunctionReturn(0);
285b2b77a04SHong Zhang }
286b2b77a04SHong Zhang 
287c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
288b2b77a04SHong Zhang #undef __FUNCT__
289b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
290b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
291b2b77a04SHong Zhang {
292b2b77a04SHong Zhang   PetscErrorCode  ierr;
293b2b77a04SHong Zhang   PetscScalar     *array;
294b2b77a04SHong Zhang 
295b2b77a04SHong Zhang   PetscFunctionBegin;
296b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
297b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
298b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
299b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
300b2b77a04SHong Zhang   PetscFunctionReturn(0);
301b2b77a04SHong Zhang }
302b2b77a04SHong Zhang 
3033c3a9c75SAmlan Barua 
3044f7415efSAmlan Barua #undef __FUNCT__
3054be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW"
3064be45526SHong Zhang /*@
307b2aa233eSHong Zhang    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
3084f7415efSAmlan Barua      parallel layout determined by FFTW
3094f7415efSAmlan Barua 
3104f7415efSAmlan Barua    Collective on Mat
3114f7415efSAmlan Barua 
3124f7415efSAmlan Barua    Input Parameter:
3134f7415efSAmlan Barua .  mat - the matrix
3144f7415efSAmlan Barua 
3154f7415efSAmlan Barua    Output Parameter:
3164f7415efSAmlan Barua +   fin - (optional) input vector of forward FFTW
3174f7415efSAmlan Barua -   fout - (optional) output vector of forward FFTW
3184f7415efSAmlan Barua 
3194f7415efSAmlan Barua   Level: advanced
3204f7415efSAmlan Barua 
3214f7415efSAmlan Barua .seealso: MatCreateFFTW()
3224be45526SHong Zhang @*/
3234be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
3244be45526SHong Zhang {
3254be45526SHong Zhang   PetscErrorCode ierr;
3264be45526SHong Zhang   PetscFunctionBegin;
3274be45526SHong Zhang   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
3284be45526SHong Zhang   PetscFunctionReturn(0);
3294be45526SHong Zhang }
3304be45526SHong Zhang 
3314f7415efSAmlan Barua EXTERN_C_BEGIN
3324be45526SHong Zhang #undef __FUNCT__
3334be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW"
3344be45526SHong Zhang PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
3354f7415efSAmlan Barua {
3364f7415efSAmlan Barua   PetscErrorCode ierr;
3374f7415efSAmlan Barua   PetscMPIInt    size,rank;
3384f7415efSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
3394f7415efSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
3404f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
3419496c9aeSAmlan Barua   PetscInt       N=fft->N;
3424f7415efSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
3434f7415efSAmlan Barua 
3444f7415efSAmlan Barua   PetscFunctionBegin;
3454f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
3464f7415efSAmlan Barua   PetscValidType(A,1);
3474f7415efSAmlan Barua 
3484f7415efSAmlan Barua   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
3494f7415efSAmlan Barua   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
3504f7415efSAmlan Barua   if (size == 1){ /* sequential case */
3514f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
3524f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
3534f7415efSAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
3544f7415efSAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
3554f7415efSAmlan Barua #else
356*8ca4c034SAmlan Barua //    if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
357*8ca4c034SAmlan Barua //    if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
358*8ca4c034SAmlan Barua //    if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);}
359*8ca4c034SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
360*8ca4c034SAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
361*8ca4c034SAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
3624f7415efSAmlan Barua #endif
3634f7415efSAmlan Barua   } else {
3644f7415efSAmlan Barua     ptrdiff_t      alloc_local,local_n0,local_0_start;
3659496c9aeSAmlan Barua     ptrdiff_t      local_n1;
3669496c9aeSAmlan Barua     fftw_complex   *data_fout;
3679496c9aeSAmlan Barua     ptrdiff_t      local_1_start;
3689496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
3699496c9aeSAmlan Barua     fftw_complex   *data_fin,*data_bout;
3709496c9aeSAmlan Barua #else
3714f7415efSAmlan Barua     double         *data_finr,*data_boutr;
3729496c9aeSAmlan Barua     PetscInt       n1,N1,vsize;
3739496c9aeSAmlan Barua     ptrdiff_t      temp;
3749496c9aeSAmlan Barua #endif
3759496c9aeSAmlan Barua 
3764f7415efSAmlan Barua     switch (ndim){
3774f7415efSAmlan Barua           case 1:
37857625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
37957625b84SAmlan Barua                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
38057625b84SAmlan Barua #else
3819496c9aeSAmlan Barua                  //SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet");
38257625b84SAmlan Barua                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
38357625b84SAmlan Barua                  if (fin) {
38457625b84SAmlan Barua                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
38557625b84SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
38657625b84SAmlan Barua                          (*fin)->ops->destroy = VecDestroy_MPIFFTW;
38757625b84SAmlan Barua                          }
38857625b84SAmlan Barua                  if (fout) {
38957625b84SAmlan Barua                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
39057625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
39157625b84SAmlan Barua                           (*fout)->ops->destroy = VecDestroy_MPIFFTW;
39257625b84SAmlan Barua                          }
39357625b84SAmlan Barua                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
39457625b84SAmlan Barua                  if (bout) {
39557625b84SAmlan Barua                           data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
39657625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
39757625b84SAmlan Barua                           (*bout)->ops->destroy = VecDestroy_MPIFFTW;
39857625b84SAmlan Barua                          }
39957625b84SAmlan Barua           break;
40057625b84SAmlan Barua #endif
4014f7415efSAmlan Barua           case 2:
4024f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4034f7415efSAmlan Barua                  alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4044f7415efSAmlan Barua                  N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
4054f7415efSAmlan Barua                  if (fin) {
4064f7415efSAmlan Barua                            data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4074f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
4084f7415efSAmlan Barua                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4094f7415efSAmlan Barua                           }
4104f7415efSAmlan Barua                  if (bout) {
4114f7415efSAmlan Barua                            data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4124f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
4134f7415efSAmlan Barua                            ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr);
4144f7415efSAmlan Barua                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4154f7415efSAmlan Barua                           }
416c8a8a4f0SAmlan Barua                  //n1 = 2*local_n1*dim[0];
41757625b84SAmlan Barua                  if (fout) {
41857625b84SAmlan Barua                             data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4199496c9aeSAmlan Barua                             ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
42057625b84SAmlan Barua                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
42157625b84SAmlan Barua                            }
4224f7415efSAmlan Barua #else
4234f7415efSAmlan Barua       /* Get local size */
4244f7415efSAmlan Barua                  alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
4254f7415efSAmlan Barua                  if (fin) {
4264f7415efSAmlan Barua                            data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4274f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
4284f7415efSAmlan Barua                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4294f7415efSAmlan Barua                           }
4304f7415efSAmlan Barua                  if (fout) {
4314f7415efSAmlan Barua                             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4324f7415efSAmlan Barua                             ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
4334f7415efSAmlan Barua                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
4344f7415efSAmlan Barua                            }
4354f7415efSAmlan Barua                  if (bout) {
4364f7415efSAmlan Barua                            data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4374f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
4384f7415efSAmlan Barua                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4394f7415efSAmlan Barua                           }
4404f7415efSAmlan Barua #endif
4414f7415efSAmlan Barua           break;
4424f7415efSAmlan Barua           case 3:
4434f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4444f7415efSAmlan Barua                  alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4454f7415efSAmlan Barua                  N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
4464f7415efSAmlan Barua                  if (fin) {
4474f7415efSAmlan Barua                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4484f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
4494f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4504f7415efSAmlan Barua                          }
4514f7415efSAmlan Barua                  if (bout) {
4524f7415efSAmlan Barua                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4534f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
4544f7415efSAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4554f7415efSAmlan Barua                           }
456c8a8a4f0SAmlan Barua                  //n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
45757625b84SAmlan Barua                  if (fout) {
45857625b84SAmlan Barua                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
45957625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
46057625b84SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
46157625b84SAmlan Barua                           }
4624f7415efSAmlan Barua #else
4630c9b18e4SAmlan Barua                  alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
4640c9b18e4SAmlan Barua                  if (fin) {
4650c9b18e4SAmlan Barua                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4660c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
4670c9b18e4SAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4680c9b18e4SAmlan Barua                          }
4690c9b18e4SAmlan Barua                  if (fout) {
4700c9b18e4SAmlan Barua                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4710c9b18e4SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
4720c9b18e4SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
4730c9b18e4SAmlan Barua                          }
4740c9b18e4SAmlan Barua                  if (bout) {
4750c9b18e4SAmlan Barua                          data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4760c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
4770c9b18e4SAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4780c9b18e4SAmlan Barua                          }
4794f7415efSAmlan Barua #endif
4804f7415efSAmlan Barua           break;
4814f7415efSAmlan Barua           default:
4824f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4834f7415efSAmlan Barua                  temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
4844f7415efSAmlan Barua                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
4854f7415efSAmlan Barua                  alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4864f7415efSAmlan Barua                  N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
4874f7415efSAmlan Barua                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
4884f7415efSAmlan Barua 
4894f7415efSAmlan Barua                  if (fin) {
4904f7415efSAmlan Barua                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4914f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
4924f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4934f7415efSAmlan Barua                         }
4944f7415efSAmlan Barua                  if (bout) {
4954f7415efSAmlan Barua                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4964f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
4979496c9aeSAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4984f7415efSAmlan Barua                         }
499c8a8a4f0SAmlan Barua                  //temp = fftw->partial_dim;
500c8a8a4f0SAmlan Barua                  //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
501c8a8a4f0SAmlan Barua                  //n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
50257625b84SAmlan Barua                  if (fout) {
50357625b84SAmlan Barua                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
50457625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
50557625b84SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
50657625b84SAmlan Barua                         }
5074f7415efSAmlan Barua #else
5080c9b18e4SAmlan Barua                 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
5090c9b18e4SAmlan Barua                 if (fin) {
5100c9b18e4SAmlan Barua                        data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5110c9b18e4SAmlan Barua                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5120c9b18e4SAmlan Barua                        (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5130c9b18e4SAmlan Barua                        }
5140c9b18e4SAmlan Barua                 if (fout) {
5150c9b18e4SAmlan Barua                          data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5160c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5170c9b18e4SAmlan Barua                          (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5180c9b18e4SAmlan Barua                        }
5190c9b18e4SAmlan Barua                 if (bout) {
5200c9b18e4SAmlan Barua                        data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5210c9b18e4SAmlan Barua                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5220c9b18e4SAmlan Barua                        (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5230c9b18e4SAmlan Barua                        }
5244f7415efSAmlan Barua #endif
5254f7415efSAmlan Barua             break;
5264f7415efSAmlan Barua           }
5274f7415efSAmlan Barua   }
5284f7415efSAmlan Barua   PetscFunctionReturn(0);
5294f7415efSAmlan Barua }
5304f7415efSAmlan Barua EXTERN_C_END
5314f7415efSAmlan Barua 
532c92418ccSAmlan Barua #undef __FUNCT__
533b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW"
534b2b77a04SHong Zhang /*
535b2b77a04SHong Zhang    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
536b2b77a04SHong Zhang      parallel layout determined by FFTW
537b2b77a04SHong Zhang 
538b2b77a04SHong Zhang    Collective on Mat
539b2b77a04SHong Zhang 
540b2b77a04SHong Zhang    Input Parameter:
541b2b77a04SHong Zhang .  mat - the matrix
542b2b77a04SHong Zhang 
543b2b77a04SHong Zhang    Output Parameter:
544b2b77a04SHong Zhang +   fin - (optional) input vector of forward FFTW
545b2b77a04SHong Zhang -   fout - (optional) output vector of forward FFTW
546b2b77a04SHong Zhang 
547b2b77a04SHong Zhang   Level: advanced
548b2b77a04SHong Zhang 
549b2b77a04SHong Zhang .seealso: MatCreateFFTW()
550b2b77a04SHong Zhang */
551b2b77a04SHong Zhang PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
552b2b77a04SHong Zhang {
553b2b77a04SHong Zhang   PetscErrorCode ierr;
554b2b77a04SHong Zhang   PetscMPIInt    size,rank;
555b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
556b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
557c92418ccSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
5589496c9aeSAmlan Barua   PetscInt       N=fft->N;
559e81bb599SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
560b2b77a04SHong Zhang 
561b2b77a04SHong Zhang   PetscFunctionBegin;
562b2b77a04SHong Zhang   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
563b2b77a04SHong Zhang   PetscValidType(A,1);
564b2b77a04SHong Zhang 
565b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
566b2b77a04SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
567b2b77a04SHong Zhang   if (size == 1){ /* sequential case */
568e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX)
569b2b77a04SHong Zhang     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
570b2b77a04SHong Zhang     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
571e5d7f247SAmlan Barua #else
572e81bb599SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
573e81bb599SAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
574e81bb599SAmlan Barua #endif
575b2b77a04SHong Zhang   } else {        /* mpi case */
576b2b77a04SHong Zhang     ptrdiff_t      alloc_local,local_n0,local_0_start;
5776882372aSHong Zhang     ptrdiff_t      local_n1,local_1_end;
578b2b77a04SHong Zhang     fftw_complex   *data_fin,*data_fout;
5799496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
58051d1eed7SAmlan Barua     double         *data_finr ;
581b3a17365SAmlan Barua     ptrdiff_t      local_1_start,temp;
5829496c9aeSAmlan Barua     PetscInt       vsize,n1,N1;
5839496c9aeSAmlan Barua #endif
5849496c9aeSAmlan Barua 
585c92418ccSAmlan Barua //    PetscInt ctr;
586c92418ccSAmlan Barua //    ptrdiff_t      ndim1,*pdim;
587c92418ccSAmlan Barua //    ndim1=(ptrdiff_t) ndim;
588c92418ccSAmlan Barua //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
58911902ff2SHong Zhang 
590c92418ccSAmlan Barua //    for(ctr=0;ctr<ndim;ctr++)
591f76f76beSAmlan Barua //        {k
592c92418ccSAmlan Barua //           pdim[ctr] = dim[ctr];
593c92418ccSAmlan Barua //       }
594b2b77a04SHong Zhang 
595f76f76beSAmlan Barua 
596f76f76beSAmlan Barua 
597b2b77a04SHong Zhang     switch (ndim){
598b2b77a04SHong Zhang     case 1:
5996882372aSHong Zhang       /* Get local size */
600e5d7f247SAmlan Barua       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
601c92418ccSAmlan Barua //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
6026882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
6039496c9aeSAmlan Barua       printf("The values of local_n0 and local_n1 are %ld and %ld\n",local_n0,local_n1);
6046882372aSHong Zhang       if (fin) {
6056882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6066882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
6076882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6086882372aSHong Zhang       }
6096882372aSHong Zhang       if (fout) {
6106882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
61157625b84SAmlan Barua         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6126882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6136882372aSHong Zhang       }
614b2b77a04SHong Zhang       break;
615b2b77a04SHong Zhang     case 2:
6163c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6173c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
6183c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
6193c3a9c75SAmlan Barua       if (fin) {
6203c3a9c75SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
62154dd5118SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
6223c3a9c75SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
62309dd8a53SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
6243c3a9c75SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6253c3a9c75SAmlan Barua       }
62657625b84SAmlan Barua       n1 = 2*local_n1*(dim[0]);
62757625b84SAmlan Barua       //n1 = 2*local_n1*dim[1];
62857625b84SAmlan Barua       printf("The values are %ld\n",local_n1);
6293c3a9c75SAmlan Barua       if (fout) {
63009dd8a53SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
63109dd8a53SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6323c3a9c75SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6333c3a9c75SAmlan Barua       }
634f76f76beSAmlan Barua       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
6353c3a9c75SAmlan Barua 
6363c3a9c75SAmlan Barua #else
637b2b77a04SHong Zhang       /* Get local size */
63864657d84SAmlan Barua      //printf("Hope this does not come here");
639b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
640b2b77a04SHong Zhang       if (fin) {
641b2b77a04SHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
642b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
643b2b77a04SHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
644b2b77a04SHong Zhang       }
645b2b77a04SHong Zhang       if (fout) {
646b2b77a04SHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
647b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
648b2b77a04SHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
649b2b77a04SHong Zhang       }
65064657d84SAmlan Barua      //printf("Hope this does not come here");
6513c3a9c75SAmlan Barua #endif
652b2b77a04SHong Zhang       break;
653b2b77a04SHong Zhang     case 3:
6546882372aSHong Zhang       /* Get local size */
6553c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
65651d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
65751d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
65851d1eed7SAmlan Barua       if (fin) {
65951d1eed7SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
66051d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
66151d1eed7SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
66251d1eed7SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
66351d1eed7SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
66451d1eed7SAmlan Barua       }
66557625b84SAmlan Barua       printf("The value is %ld",local_n1);
66657625b84SAmlan Barua       n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
66751d1eed7SAmlan Barua       if (fout) {
66851d1eed7SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
66951d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
67051d1eed7SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
67151d1eed7SAmlan Barua       }
67251d1eed7SAmlan Barua       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
67351d1eed7SAmlan Barua 
67451d1eed7SAmlan Barua 
6753c3a9c75SAmlan Barua #else
6766882372aSHong Zhang       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
67711902ff2SHong Zhang //      printf("The quantity n is %d",n);
6786882372aSHong Zhang       if (fin) {
6796882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6806882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
6816882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6826882372aSHong Zhang       }
6836882372aSHong Zhang       if (fout) {
6846882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6856882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6866882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6876882372aSHong Zhang       }
6883c3a9c75SAmlan Barua #endif
689b2b77a04SHong Zhang       break;
690b2b77a04SHong Zhang     default:
6916882372aSHong Zhang       /* Get local size */
6923c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
693b3a17365SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
694b3a17365SAmlan Barua       printf("The value of temp is %ld\n",temp);
695b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
696b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
697b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
698b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
699b3a17365SAmlan Barua       if (fin) {
700b3a17365SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
701b3a17365SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
702b3a17365SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
703b3a17365SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
704b3a17365SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
705b3a17365SAmlan Barua       }
70657625b84SAmlan Barua       printf("The value is %ld. Global length is %d \n",local_n1,N1);
70757625b84SAmlan Barua       temp = fftw->partial_dim;
70857625b84SAmlan Barua       fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
70957625b84SAmlan Barua 
71057625b84SAmlan Barua       n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
711b3a17365SAmlan Barua       if (fout) {
712b3a17365SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
71357625b84SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
714b3a17365SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
715b3a17365SAmlan Barua       }
716b3a17365SAmlan Barua 
7173c3a9c75SAmlan Barua #else
718c92418ccSAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
71911902ff2SHong Zhang //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
72011902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
72111902ff2SHong Zhang //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
72211902ff2SHong Zhang //      for(i=0;i<ndim;i++)
72311902ff2SHong Zhang //         {
72411902ff2SHong Zhang //          pdim[i]=dim[i];printf("%d",pdim[i]);
72511902ff2SHong Zhang //         }
72611902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
72711902ff2SHong Zhang //      printf("The quantity n is %d",n);
7286882372aSHong Zhang       if (fin) {
7296882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7306882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
7316882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
7326882372aSHong Zhang       }
7336882372aSHong Zhang       if (fout) {
7346882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7356882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
7366882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
7376882372aSHong Zhang       }
7383c3a9c75SAmlan Barua #endif
739b2b77a04SHong Zhang       break;
740b2b77a04SHong Zhang     }
741b2b77a04SHong Zhang   }
74254dd5118SAmlan Barua //  if (fin){
74354dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
74454dd5118SAmlan Barua //  }
74554dd5118SAmlan Barua //  if (fout){
74654dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
74754dd5118SAmlan Barua //  }
748b2b77a04SHong Zhang   PetscFunctionReturn(0);
749b2b77a04SHong Zhang }
750b2b77a04SHong Zhang 
751b2b77a04SHong Zhang #undef __FUNCT__
7523c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT"
7533c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
7543c3a9c75SAmlan Barua {
7553c3a9c75SAmlan Barua   PetscErrorCode ierr;
7563c3a9c75SAmlan Barua   PetscFunctionBegin;
7573c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
7583c3a9c75SAmlan Barua   PetscFunctionReturn(0);
7593c3a9c75SAmlan Barua }
76054efbe56SHong Zhang 
761b2b77a04SHong Zhang /*
7629496c9aeSAmlan Barua       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block. For real
7639496c9aeSAmlan Barua       parallel FFT, this routine also performs padding of right number of zeros at the end of the fastetst
7649496c9aeSAmlan Barua       changing dimension.
7653c3a9c75SAmlan Barua   Input A, x, y
7663c3a9c75SAmlan Barua   A - FFTW matrix
76754dd5118SAmlan Barua   x - user data
768b2b77a04SHong Zhang   Options Database Keys:
769b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags
770b2b77a04SHong Zhang 
771b2b77a04SHong Zhang    Level: intermediate
772b2b77a04SHong Zhang 
773b2b77a04SHong Zhang */
7743c3a9c75SAmlan Barua 
7753c3a9c75SAmlan Barua EXTERN_C_BEGIN
7763c3a9c75SAmlan Barua #undef __FUNCT__
7773c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW"
7783c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
7793c3a9c75SAmlan Barua {
7803c3a9c75SAmlan Barua   PetscErrorCode ierr;
7813c3a9c75SAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
7823c3a9c75SAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
7833c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
7849496c9aeSAmlan Barua   PetscInt       N=fft->N;
785b223cc97SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
7869496c9aeSAmlan Barua   PetscInt       low;
787*8ca4c034SAmlan Barua   PetscInt       rank,size,vsize,vsize1;
7883c3a9c75SAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
7899496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
7903c3a9c75SAmlan Barua   VecScatter     vecscat;
7913c3a9c75SAmlan Barua   IS             list1,list2;
7929496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
7939496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
7949496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
7959496c9aeSAmlan Barua   PetscInt       N1, n1 ,NM;
7969496c9aeSAmlan Barua   ptrdiff_t      temp;
7979496c9aeSAmlan Barua #endif
7983c3a9c75SAmlan Barua 
799f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
800f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
8013c3a9c75SAmlan Barua   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
8023c3a9c75SAmlan Barua 
803e81bb599SAmlan Barua   if (size==1)
804e81bb599SAmlan Barua     {
8057536937bSAmlan Barua /*     switch (ndim){
806e81bb599SAmlan Barua      case 1:
807e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
808e81bb599SAmlan Barua           for (i=0;i<dim[0];i++)
809e81bb599SAmlan Barua              {
810e81bb599SAmlan Barua               indx1[i] = i;
811e81bb599SAmlan Barua              }
812e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
813e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
814e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
815e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
816e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
817b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
818b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
819e81bb599SAmlan Barua      break;
820e81bb599SAmlan Barua 
821e81bb599SAmlan Barua      case 2:
822e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
823e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
824e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
825e81bb599SAmlan Barua                 indx1[i*dim[1]+j] = i*dim[1] + j;
826e81bb599SAmlan Barua              }
827e81bb599SAmlan Barua           }
828e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
829e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
830e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
831e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
832e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
833b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
834b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
835e81bb599SAmlan Barua           break;
836e81bb599SAmlan Barua      case 3:
837e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
838e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
839e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
840e81bb599SAmlan Barua                 for (k=0;k<dim[2];k++){
841e81bb599SAmlan Barua                    indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
842e81bb599SAmlan Barua                 }
843e81bb599SAmlan Barua              }
844e81bb599SAmlan Barua           }
845e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
846e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
847e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
848e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
849e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
850b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
851b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
852e81bb599SAmlan Barua           break;
853e81bb599SAmlan Barua      default:
8547536937bSAmlan Barua */
855*8ca4c034SAmlan Barua           ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
856*8ca4c034SAmlan Barua           ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
857*8ca4c034SAmlan Barua           printf("The values of sizes are %d and %d",vsize,vsize1);
8589496c9aeSAmlan Barua           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
8596971673cSAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
8606971673cSAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8616971673cSAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8626971673cSAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
863b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
8647536937bSAmlan Barua  //         break;
8657536937bSAmlan Barua   //    }
866e81bb599SAmlan Barua     }
867e81bb599SAmlan Barua 
868e81bb599SAmlan Barua  else{
869e81bb599SAmlan Barua 
8703c3a9c75SAmlan Barua  switch (ndim){
8713c3a9c75SAmlan Barua  case 1:
87264657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
87364657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
87464657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
87564657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
87664657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
87764657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
87864657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
87964657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
88064657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
88164657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
88264657d84SAmlan Barua #else
883e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
88464657d84SAmlan Barua #endif
8853c3a9c75SAmlan Barua  break;
8863c3a9c75SAmlan Barua  case 2:
887bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
888bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
889bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
890bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
891bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
892bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
893bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
894bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
895bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
896bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
897bd59e6c6SAmlan Barua #else
8983c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
8993c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
9009496c9aeSAmlan Barua       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
9019496c9aeSAmlan Barua       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
9029496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
9039496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
9043c3a9c75SAmlan Barua 
9053c3a9c75SAmlan Barua       if (dim[1]%2==0)
9063c3a9c75SAmlan Barua         NM = dim[1]+2;
9073c3a9c75SAmlan Barua       else
9083c3a9c75SAmlan Barua         NM = dim[1]+1;
9093c3a9c75SAmlan Barua 
9103c3a9c75SAmlan Barua       for (i=0;i<local_n0;i++){
9113c3a9c75SAmlan Barua          for (j=0;j<dim[1];j++){
9123c3a9c75SAmlan Barua             tempindx = i*dim[1] + j;
9133c3a9c75SAmlan Barua             tempindx1 = i*NM + j;
9145b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
9153c3a9c75SAmlan Barua             indx2[tempindx]=low+tempindx1;
9163c3a9c75SAmlan Barua         }
9173c3a9c75SAmlan Barua      }
9183c3a9c75SAmlan Barua 
9193c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
9203c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
9213c3a9c75SAmlan Barua 
922f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
923f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
925f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
926b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
927b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
928b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
929b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
930bd59e6c6SAmlan Barua #endif
9319496c9aeSAmlan Barua  break;
9323c3a9c75SAmlan Barua 
9333c3a9c75SAmlan Barua  case 3:
934bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
935bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
936bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
937bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
938bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
939bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
940bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
941bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
942bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
944bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
945bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
946bd59e6c6SAmlan Barua #else
94751d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
94851d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
94951d1eed7SAmlan Barua 
9509496c9aeSAmlan Barua //      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
9519496c9aeSAmlan Barua //      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
9529496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
9539496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
95451d1eed7SAmlan Barua 
95551d1eed7SAmlan Barua       if (dim[2]%2==0)
95651d1eed7SAmlan Barua         NM = dim[2]+2;
95751d1eed7SAmlan Barua       else
95851d1eed7SAmlan Barua         NM = dim[2]+1;
95951d1eed7SAmlan Barua 
96051d1eed7SAmlan Barua       for (i=0;i<local_n0;i++){
96151d1eed7SAmlan Barua          for (j=0;j<dim[1];j++){
96251d1eed7SAmlan Barua             for (k=0;k<dim[2];k++){
96351d1eed7SAmlan Barua                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
96451d1eed7SAmlan Barua                tempindx1 = i*dim[1]*NM + j*NM + k;
96551d1eed7SAmlan Barua                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
96651d1eed7SAmlan Barua                indx2[tempindx]=low+tempindx1;
96751d1eed7SAmlan Barua             }
96851d1eed7SAmlan Barua          }
96951d1eed7SAmlan Barua       }
97051d1eed7SAmlan Barua 
97151d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
97251d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
97351d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
97451d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
97551d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
97651d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
977b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
978b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
979b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
980b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
981bd59e6c6SAmlan Barua #endif
9829496c9aeSAmlan Barua  break;
9833c3a9c75SAmlan Barua 
9843c3a9c75SAmlan Barua  default:
985bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
986bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
987bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
988bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
989bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
990bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
991bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
993bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
994bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
995bd59e6c6SAmlan Barua #else
996e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
997e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
998e5d7f247SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
999e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1000e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1001e5d7f247SAmlan Barua 
1002e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
1003e5d7f247SAmlan Barua 
1004e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1005e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1006e5d7f247SAmlan Barua 
1007e5d7f247SAmlan Barua       if (dim[ndim-1]%2==0)
1008ba6e06d0SAmlan Barua         NM = 2;
1009e5d7f247SAmlan Barua       else
1010ba6e06d0SAmlan Barua         NM = 1;
1011e5d7f247SAmlan Barua 
10126971673cSAmlan Barua       j = low;
10136971673cSAmlan Barua       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
10146971673cSAmlan Barua          {
10156971673cSAmlan Barua           indx1[i] = local_0_start*partial_dim + i;
10166971673cSAmlan Barua           indx2[i] = j;
10176971673cSAmlan Barua           if (k%dim[ndim-1]==0)
10186971673cSAmlan Barua             { j+=NM;}
10196971673cSAmlan Barua           j++;
10206971673cSAmlan Barua          }
10216971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
10226971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
10236971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
10246971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10256971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10266971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1027b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1028b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1029b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1030b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1031bd59e6c6SAmlan Barua #endif
10329496c9aeSAmlan Barua  break;
10333c3a9c75SAmlan Barua   }
1034e81bb599SAmlan Barua  }
10353c3a9c75SAmlan Barua 
10363c3a9c75SAmlan Barua  return 0;
10373c3a9c75SAmlan Barua }
10383c3a9c75SAmlan Barua EXTERN_C_END
10393c3a9c75SAmlan Barua 
10403c3a9c75SAmlan Barua #undef __FUNCT__
10413c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT"
10423c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
10433c3a9c75SAmlan Barua {
10443c3a9c75SAmlan Barua   PetscErrorCode ierr;
10453c3a9c75SAmlan Barua   PetscFunctionBegin;
10463c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
10473c3a9c75SAmlan Barua   PetscFunctionReturn(0);
10483c3a9c75SAmlan Barua }
104954efbe56SHong Zhang 
10505b3e41ffSAmlan Barua /*
10515b3e41ffSAmlan Barua       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
10525b3e41ffSAmlan Barua   Input A, x, y
10535b3e41ffSAmlan Barua   A - FFTW matrix
10545b3e41ffSAmlan Barua   x - FFTW vector
10555b3e41ffSAmlan Barua   y - PETSc vector that user can read
10565b3e41ffSAmlan Barua   Options Database Keys:
10575b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
10585b3e41ffSAmlan Barua 
10595b3e41ffSAmlan Barua    Level: intermediate
10605b3e41ffSAmlan Barua 
10613c3a9c75SAmlan Barua */
10623c3a9c75SAmlan Barua 
10633c3a9c75SAmlan Barua EXTERN_C_BEGIN
10643c3a9c75SAmlan Barua #undef __FUNCT__
10655b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW"
10665b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
10675b3e41ffSAmlan Barua {
10685b3e41ffSAmlan Barua   PetscErrorCode ierr;
10695b3e41ffSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
10705b3e41ffSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
10715b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
10729496c9aeSAmlan Barua   PetscInt       N=fft->N;
1073b3655f67SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
10749496c9aeSAmlan Barua   PetscInt       low;
10759496c9aeSAmlan Barua   PetscInt       rank,size;
10765b3e41ffSAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
10779496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
10785b3e41ffSAmlan Barua   VecScatter     vecscat;
10795b3e41ffSAmlan Barua   IS             list1,list2;
10809496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
10819496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
10829496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
10839496c9aeSAmlan Barua   PetscInt       N1, n1 ,NM;
10849496c9aeSAmlan Barua   ptrdiff_t      temp;
10859496c9aeSAmlan Barua #endif
10869496c9aeSAmlan Barua 
10875b3e41ffSAmlan Barua 
10885b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
10895b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1090b3655f67SAmlan Barua   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr);
10915b3e41ffSAmlan Barua 
1092e81bb599SAmlan Barua   if (size==1){
10937536937bSAmlan Barua /*
10945b3e41ffSAmlan Barua     switch (ndim){
10955b3e41ffSAmlan Barua     case 1:
1096e81bb599SAmlan Barua            ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
1097e81bb599SAmlan Barua           for (i=0;i<dim[0];i++)
1098e81bb599SAmlan Barua              {
1099e81bb599SAmlan Barua               indx1[i] = i;
1100e81bb599SAmlan Barua              }
1101e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1102e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1103e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1104e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1105e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1106b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
1107b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
1108e81bb599SAmlan Barua           break;
1109e81bb599SAmlan Barua 
1110e81bb599SAmlan Barua     case 2:
1111e81bb599SAmlan Barua          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
1112e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
1113e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
1114e81bb599SAmlan Barua                 indx1[i*dim[1]+j] = i*dim[1] + j;
1115e81bb599SAmlan Barua              }
1116e81bb599SAmlan Barua           }
1117e81bb599SAmlan Barua          ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1118e81bb599SAmlan Barua          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1119e81bb599SAmlan Barua          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1120e81bb599SAmlan Barua          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1121e81bb599SAmlan Barua          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1122b223cc97SAmlan Barua          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1123b223cc97SAmlan Barua          ierr = PetscFree(indx1);CHKERRQ(ierr);
1124e81bb599SAmlan Barua          break;
1125e81bb599SAmlan Barua     case 3:
1126e81bb599SAmlan Barua          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1127e81bb599SAmlan Barua          for (i=0;i<dim[0];i++){
1128e81bb599SAmlan Barua             for (j=0;j<dim[1];j++){
1129e81bb599SAmlan Barua                for (k=0;k<dim[2];k++){
1130e81bb599SAmlan Barua                   indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
1131e81bb599SAmlan Barua                }
1132e81bb599SAmlan Barua             }
1133e81bb599SAmlan Barua          }
1134e81bb599SAmlan Barua          ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1135e81bb599SAmlan Barua          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1136e81bb599SAmlan Barua          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1137e81bb599SAmlan Barua          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1138e81bb599SAmlan Barua          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1139b223cc97SAmlan Barua          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1140b223cc97SAmlan Barua          ierr = PetscFree(indx1);CHKERRQ(ierr);
1141e81bb599SAmlan Barua          break;
1142e81bb599SAmlan Barua     default:
11437536937bSAmlan Barua */
1144b3655f67SAmlan Barua     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
11456971673cSAmlan Barua     //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF);
11466971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
11476971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
11486971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
11496971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1150b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
11517536937bSAmlan Barua   //       break;
11527536937bSAmlan Barua    // }
1153e81bb599SAmlan Barua   }
1154e81bb599SAmlan Barua   else{
1155e81bb599SAmlan Barua 
1156e81bb599SAmlan Barua  switch (ndim){
1157e81bb599SAmlan Barua  case 1:
115864657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
115964657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
11609496c9aeSAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1);
11619496c9aeSAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2);
116264657d84SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
116364657d84SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
116464657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
116564657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
116664657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
116764657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
116864657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
116964657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
117064657d84SAmlan Barua #else
11716a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
117264657d84SAmlan Barua #endif
11735b3e41ffSAmlan Barua  break;
11745b3e41ffSAmlan Barua  case 2:
1175bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1176bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1177bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1178bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
1179bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1180bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1181bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1182bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1183bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1184bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1185bd59e6c6SAmlan Barua #else
11865b3e41ffSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
11875b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
11885b3e41ffSAmlan Barua 
11899496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
11909496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
11915b3e41ffSAmlan Barua 
11925b3e41ffSAmlan Barua       if (dim[1]%2==0)
11935b3e41ffSAmlan Barua         NM = dim[1]+2;
11945b3e41ffSAmlan Barua       else
11955b3e41ffSAmlan Barua         NM = dim[1]+1;
11965b3e41ffSAmlan Barua 
11975b3e41ffSAmlan Barua       for (i=0;i<local_n0;i++){
11985b3e41ffSAmlan Barua          for (j=0;j<dim[1];j++){
11995b3e41ffSAmlan Barua             tempindx = i*dim[1] + j;
12005b3e41ffSAmlan Barua             tempindx1 = i*NM + j;
12015b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
12025b3e41ffSAmlan Barua             indx2[tempindx]=low+tempindx1;
12035b3e41ffSAmlan Barua          }
12045b3e41ffSAmlan Barua        }
12055b3e41ffSAmlan Barua 
12065b3e41ffSAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
12075b3e41ffSAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
12085b3e41ffSAmlan Barua 
12095b3e41ffSAmlan Barua        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
12105b3e41ffSAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
12115b3e41ffSAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
12125b3e41ffSAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1213b223cc97SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1214b223cc97SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1215b223cc97SAmlan Barua        ierr = PetscFree(indx1);CHKERRQ(ierr);
1216b223cc97SAmlan Barua        ierr = PetscFree(indx2);CHKERRQ(ierr);
1217bd59e6c6SAmlan Barua #endif
12189496c9aeSAmlan Barua  break;
12195b3e41ffSAmlan Barua  case 3:
1220bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1221bd59e6c6SAmlan Barua        alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1222bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1223bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1224bd59e6c6SAmlan Barua        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1225bd59e6c6SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1226bd59e6c6SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1227bd59e6c6SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1228bd59e6c6SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1229bd59e6c6SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1230bd59e6c6SAmlan Barua #else
1231bd59e6c6SAmlan Barua 
123251d1eed7SAmlan Barua        alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
123351d1eed7SAmlan Barua        N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
123451d1eed7SAmlan Barua 
12359496c9aeSAmlan Barua //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
12369496c9aeSAmlan Barua //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
12379496c9aeSAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
12389496c9aeSAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
123951d1eed7SAmlan Barua 
124051d1eed7SAmlan Barua        if (dim[2]%2==0)
124151d1eed7SAmlan Barua         NM = dim[2]+2;
124251d1eed7SAmlan Barua        else
124351d1eed7SAmlan Barua         NM = dim[2]+1;
124451d1eed7SAmlan Barua 
124551d1eed7SAmlan Barua        for (i=0;i<local_n0;i++){
124651d1eed7SAmlan Barua           for (j=0;j<dim[1];j++){
124751d1eed7SAmlan Barua              for (k=0;k<dim[2];k++){
124851d1eed7SAmlan Barua                 tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
124951d1eed7SAmlan Barua                 tempindx1 = i*dim[1]*NM + j*NM + k;
125051d1eed7SAmlan Barua                 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
125151d1eed7SAmlan Barua                 indx2[tempindx]=low+tempindx1;
125251d1eed7SAmlan Barua              }
125351d1eed7SAmlan Barua         //    printf("Val tempindx1 = %d\n",tempindx1);
125451d1eed7SAmlan Barua         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
125551d1eed7SAmlan Barua         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
125651d1eed7SAmlan Barua         //    printf("-------------------------\n",indx2[tempindx],rank);
125751d1eed7SAmlan Barua           }
125851d1eed7SAmlan Barua        }
125951d1eed7SAmlan Barua 
126051d1eed7SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
126151d1eed7SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
126251d1eed7SAmlan Barua 
126351d1eed7SAmlan Barua        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
126451d1eed7SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
126551d1eed7SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
126651d1eed7SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1267b223cc97SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1268b223cc97SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1269b223cc97SAmlan Barua        ierr = PetscFree(indx1);CHKERRQ(ierr);
1270b223cc97SAmlan Barua        ierr = PetscFree(indx2);CHKERRQ(ierr);
1271bd59e6c6SAmlan Barua #endif
12729496c9aeSAmlan Barua  break;
12735b3e41ffSAmlan Barua  default:
1274bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1275bd59e6c6SAmlan Barua        alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1276bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1277bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1278bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1279bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1280bd59e6c6SAmlan Barua        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1281bd59e6c6SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1282bd59e6c6SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1283bd59e6c6SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1284bd59e6c6SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1285bd59e6c6SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1286bd59e6c6SAmlan Barua #else
1287ba6e06d0SAmlan Barua        temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1288ba6e06d0SAmlan Barua        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1289ba6e06d0SAmlan Barua        alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1290ba6e06d0SAmlan Barua        N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1291ba6e06d0SAmlan Barua        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1292ba6e06d0SAmlan Barua 
1293ba6e06d0SAmlan Barua        partial_dim = fftw->partial_dim;
1294ba6e06d0SAmlan Barua 
1295ba6e06d0SAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1296ba6e06d0SAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1297ba6e06d0SAmlan Barua 
1298ba6e06d0SAmlan Barua        if (dim[ndim-1]%2==0)
1299ba6e06d0SAmlan Barua          NM = 2;
1300ba6e06d0SAmlan Barua        else
1301ba6e06d0SAmlan Barua          NM = 1;
1302ba6e06d0SAmlan Barua 
1303ba6e06d0SAmlan Barua        j = low;
1304ba6e06d0SAmlan Barua        for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1305ba6e06d0SAmlan Barua           {
1306ba6e06d0SAmlan Barua            indx1[i] = local_0_start*partial_dim + i;
1307ba6e06d0SAmlan Barua            indx2[i] = j;
1308ba6e06d0SAmlan Barua            if (k%dim[ndim-1]==0)
1309ba6e06d0SAmlan Barua              { j+=NM;}
1310ba6e06d0SAmlan Barua            j++;
1311ba6e06d0SAmlan Barua           }
1312ba6e06d0SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1313ba6e06d0SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1314ba6e06d0SAmlan Barua 
1315ba6e06d0SAmlan Barua        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1316ba6e06d0SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1317ba6e06d0SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1318ba6e06d0SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1319b223cc97SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1320b223cc97SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1321b223cc97SAmlan Barua        ierr = PetscFree(indx1);CHKERRQ(ierr);
1322b223cc97SAmlan Barua        ierr = PetscFree(indx2);CHKERRQ(ierr);
1323bd59e6c6SAmlan Barua #endif
13249496c9aeSAmlan Barua  break;
13255b3e41ffSAmlan Barua  }
1326e81bb599SAmlan Barua  }
13275b3e41ffSAmlan Barua  return 0;
13285b3e41ffSAmlan Barua }
13295b3e41ffSAmlan Barua EXTERN_C_END
13305b3e41ffSAmlan Barua 
13315b3e41ffSAmlan Barua EXTERN_C_BEGIN
13325b3e41ffSAmlan Barua #undef __FUNCT__
13333c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
13343c3a9c75SAmlan Barua /*
13353c3a9c75SAmlan Barua       MatCreate_FFTW - Creates a matrix object that provides FFT
13363c3a9c75SAmlan Barua   via the external package FFTW
13373c3a9c75SAmlan Barua   Options Database Keys:
13383c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
13393c3a9c75SAmlan Barua 
13403c3a9c75SAmlan Barua    Level: intermediate
13413c3a9c75SAmlan Barua 
13423c3a9c75SAmlan Barua */
13433c3a9c75SAmlan Barua 
1344b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A)
1345b2b77a04SHong Zhang {
1346b2b77a04SHong Zhang   PetscErrorCode ierr;
1347b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
1348b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1349b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1350b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1351b2b77a04SHong Zhang   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1352b2b77a04SHong Zhang   PetscBool      flg;
13534f48bc67SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr;
135411902ff2SHong Zhang   PetscMPIInt    size,rank;
13559496c9aeSAmlan Barua   ptrdiff_t      *pdim;
13569496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
13579496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
13589496c9aeSAmlan Barua    ptrdiff_t     temp;
1359*8ca4c034SAmlan Barua    PetscInt      N1,tot_dim;
13604f48bc67SAmlan Barua #else
13614f48bc67SAmlan Barua    PetscInt n1;
13629496c9aeSAmlan Barua #endif
13639496c9aeSAmlan Barua 
1364b2b77a04SHong Zhang 
1365b2b77a04SHong Zhang   PetscFunctionBegin;
1366b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
136711902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
136854dd5118SAmlan Barua   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1369c92418ccSAmlan Barua 
137011902ff2SHong Zhang   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
137111902ff2SHong Zhang   pdim[0] = dim[0];
1372*8ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1373*8ca4c034SAmlan Barua   tot_dim = 2*dim[0];
1374*8ca4c034SAmlan Barua #endif
137511902ff2SHong Zhang   for (ctr=1;ctr<ndim;ctr++)
137611902ff2SHong Zhang      {
13776882372aSHong Zhang           partial_dim *= dim[ctr];
137811902ff2SHong Zhang           pdim[ctr] = dim[ctr];
1379*8ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1380*8ca4c034SAmlan Barua           if (ctr==ndim-1)
1381*8ca4c034SAmlan Barua             tot_dim *= (dim[ctr]/2+1);
1382*8ca4c034SAmlan Barua           else
1383*8ca4c034SAmlan Barua             tot_dim *= dim[ctr];
1384*8ca4c034SAmlan Barua #endif
13856882372aSHong Zhang      }
13863c3a9c75SAmlan Barua 
1387b2b77a04SHong Zhang   if (size == 1) {
1388e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1389b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1390b2b77a04SHong Zhang     n = N;
1391e81bb599SAmlan Barua #else
1392*8ca4c034SAmlan Barua     //printf("The value of N is %d\n",N);
1393*8ca4c034SAmlan Barua     //tot_dim = 2*N*(dim[ndim-1]/2+1);
1394*8ca4c034SAmlan Barua     //tot_dim = ((long int)tot_dim)/dim[ndim-1];
1395*8ca4c034SAmlan Barua     //printf("The value of is %ld and %d\n",tot_dim,dim[ndim-1]);
1396e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1397e81bb599SAmlan Barua     n = tot_dim;
1398e81bb599SAmlan Barua #endif
1399e81bb599SAmlan Barua 
1400b2b77a04SHong Zhang   } else {
14019496c9aeSAmlan Barua     ptrdiff_t alloc_local,local_n0,local_0_start;//local_1_end;
1402b2b77a04SHong Zhang     switch (ndim){
1403b2b77a04SHong Zhang     case 1:
14043c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
14053c3a9c75SAmlan Barua    SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1406e5d7f247SAmlan Barua #else
14079496c9aeSAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
14086882372aSHong Zhang       n = (PetscInt)local_n0;
14099496c9aeSAmlan Barua       n1 = (PetscInt) local_n1;
14109496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1411e5d7f247SAmlan Barua #endif
1412b2b77a04SHong Zhang       break;
1413b2b77a04SHong Zhang     case 2:
14145b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
1415b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1416b2b77a04SHong Zhang       /*
1417b2b77a04SHong Zhang        PetscMPIInt    rank;
1418b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1419b2b77a04SHong Zhang        PetscSynchronizedFlush(comm);
1420b2b77a04SHong Zhang        */
1421b2b77a04SHong Zhang       n = (PetscInt)local_n0*dim[1];
1422b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
14235b3e41ffSAmlan Barua #else
14245b3e41ffSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
14255b3e41ffSAmlan Barua       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1426c8a8a4f0SAmlan Barua //      n1 = 2*(PetscInt)local_n1*(dim[0]);
1427c8a8a4f0SAmlan Barua //      ierr = MatSetSizes(A,n1,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1428c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
14295b3e41ffSAmlan Barua #endif
1430b2b77a04SHong Zhang       break;
1431b2b77a04SHong Zhang     case 3:
143251d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
143351d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
14346882372aSHong Zhang       n = (PetscInt)local_n0*dim[1]*dim[2];
14356882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
143651d1eed7SAmlan Barua #else
143751d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
143851d1eed7SAmlan Barua       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1439c8a8a4f0SAmlan Barua //      n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
1440c8a8a4f0SAmlan Barua //      ierr = MatSetSizes(A,n1,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1441c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
144251d1eed7SAmlan Barua #endif
1443b2b77a04SHong Zhang       break;
1444b2b77a04SHong Zhang     default:
1445b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
144611902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
14476882372aSHong Zhang       n = (PetscInt)local_n0*partial_dim;
14486882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1449b3a17365SAmlan Barua #else
1450b3a17365SAmlan Barua       temp = pdim[ndim-1];
1451b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
1452b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1453b3a17365SAmlan Barua       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1454b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1455b3a17365SAmlan Barua       pdim[ndim-1] = temp;
1456c8a8a4f0SAmlan Barua //      temp = partial_dim*(dim[ndim-1]/2+1)*dim[0]/(dim[1]*dim[ndim-1]);
1457c8a8a4f0SAmlan Barua //      n1 = 2*local_n1*temp;
1458c8a8a4f0SAmlan Barua //      ierr = MatSetSizes(A,n1,n,N1,N1);CHKERRQ(ierr);
1459c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1460b3a17365SAmlan Barua #endif
1461b2b77a04SHong Zhang       break;
1462b2b77a04SHong Zhang     }
1463b2b77a04SHong Zhang   }
1464b2b77a04SHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1465b2b77a04SHong Zhang   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1466b2b77a04SHong Zhang   fft->data = (void*)fftw;
1467b2b77a04SHong Zhang 
1468b2b77a04SHong Zhang   fft->n           = n;
1469c92418ccSAmlan Barua   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1470e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
1471c92418ccSAmlan Barua   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1472b1301b2eSHong Zhang   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1473c92418ccSAmlan Barua 
1474b2b77a04SHong Zhang   fftw->p_forward  = 0;
1475b2b77a04SHong Zhang   fftw->p_backward = 0;
1476b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1477b2b77a04SHong Zhang 
1478b2b77a04SHong Zhang   if (size == 1){
1479b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1480b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1481b2b77a04SHong Zhang   } else {
1482b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1483b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1484b2b77a04SHong Zhang   }
1485b2b77a04SHong Zhang   fft->matdestroy          = MatDestroy_FFTW;
14866a506ed8SAmlan Barua   // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D
14874be45526SHong Zhang   //A->ops->getvecs       = MatGetVecs_FFTW;
1488b2b77a04SHong Zhang   A->assembled          = PETSC_TRUE;
14894be45526SHong Zhang   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);
14903c3a9c75SAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
14915b3e41ffSAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
1492b2b77a04SHong Zhang 
1493b2b77a04SHong Zhang   /* get runtime options */
1494b2b77a04SHong Zhang   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1495b2b77a04SHong Zhang     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1496b2b77a04SHong Zhang     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1497b2b77a04SHong Zhang   PetscOptionsEnd();
1498b2b77a04SHong Zhang   PetscFunctionReturn(0);
1499b2b77a04SHong Zhang }
1500b2b77a04SHong Zhang EXTERN_C_END
15013c3a9c75SAmlan Barua 
15023c3a9c75SAmlan Barua 
15033c3a9c75SAmlan Barua 
15043c3a9c75SAmlan Barua 
1505