xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 4f48bc673f5b396d3f73c6d184d2e26ca0167c61)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14e5d7f247SAmlan Barua   PetscInt   partial_dim;
15b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
16b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
19b2b77a04SHong Zhang } Mat_FFTW;
20b2b77a04SHong Zhang 
21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
274be45526SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); // to be removed!
28b2b77a04SHong Zhang 
29b2b77a04SHong Zhang #undef __FUNCT__
30b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
31b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
32b2b77a04SHong Zhang {
33b2b77a04SHong Zhang   PetscErrorCode ierr;
34b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
35b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
36b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
37b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
38b2b77a04SHong Zhang 
39b2b77a04SHong Zhang   PetscFunctionBegin;
40b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
41b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
42b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
43b2b77a04SHong Zhang     switch (ndim){
44b2b77a04SHong Zhang     case 1:
4558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
46b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
4758a912c5SAmlan Barua #else
4858a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
4958a912c5SAmlan Barua #endif
50b2b77a04SHong Zhang       break;
51b2b77a04SHong Zhang     case 2:
5258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
53b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
5458a912c5SAmlan Barua #else
5558a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
5658a912c5SAmlan Barua #endif
57b2b77a04SHong Zhang       break;
58b2b77a04SHong Zhang     case 3:
5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
60b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6158a912c5SAmlan Barua #else
6258a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
6358a912c5SAmlan Barua #endif
64b2b77a04SHong Zhang       break;
65b2b77a04SHong Zhang     default:
6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
67b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6858a912c5SAmlan Barua #else
6958a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
7058a912c5SAmlan Barua #endif
71b2b77a04SHong Zhang       break;
72b2b77a04SHong Zhang     }
73b2b77a04SHong Zhang     fftw->finarray  = x_array;
74b2b77a04SHong Zhang     fftw->foutarray = y_array;
75b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
76b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
77b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
78b2b77a04SHong Zhang   } else { /* use existing plan */
79b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
80b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
81b2b77a04SHong Zhang     } else {
82b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
83b2b77a04SHong Zhang     }
84b2b77a04SHong Zhang   }
85b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
86b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
87b2b77a04SHong Zhang   PetscFunctionReturn(0);
88b2b77a04SHong Zhang }
89b2b77a04SHong Zhang 
90b2b77a04SHong Zhang #undef __FUNCT__
91b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
92b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
93b2b77a04SHong Zhang {
94b2b77a04SHong Zhang   PetscErrorCode ierr;
95b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
96b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
97b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
98b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
99b2b77a04SHong Zhang 
100b2b77a04SHong Zhang   PetscFunctionBegin;
101b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
102b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
103b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
104b2b77a04SHong Zhang     switch (ndim){
105b2b77a04SHong Zhang     case 1:
10658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
107b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
10858a912c5SAmlan Barua #else
109e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
11058a912c5SAmlan Barua #endif
111b2b77a04SHong Zhang       break;
112b2b77a04SHong Zhang     case 2:
11358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
114b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
11558a912c5SAmlan Barua #else
116e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
11758a912c5SAmlan Barua #endif
118b2b77a04SHong Zhang       break;
119b2b77a04SHong Zhang     case 3:
12058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
121b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12258a912c5SAmlan Barua #else
123e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
12458a912c5SAmlan Barua #endif
125b2b77a04SHong Zhang       break;
126b2b77a04SHong Zhang     default:
12758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
128b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12958a912c5SAmlan Barua #else
130e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
13158a912c5SAmlan Barua #endif
132b2b77a04SHong Zhang       break;
133b2b77a04SHong Zhang     }
134b2b77a04SHong Zhang     fftw->binarray  = x_array;
135b2b77a04SHong Zhang     fftw->boutarray = y_array;
136b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
137b2b77a04SHong Zhang   } else { /* use existing plan */
138b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
139b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
140b2b77a04SHong Zhang     } else {
141b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
142b2b77a04SHong Zhang     }
143b2b77a04SHong Zhang   }
144b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
145b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
146b2b77a04SHong Zhang   PetscFunctionReturn(0);
147b2b77a04SHong Zhang }
148b2b77a04SHong Zhang 
149b2b77a04SHong Zhang #undef __FUNCT__
150b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
151b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
152b2b77a04SHong Zhang {
153b2b77a04SHong Zhang   PetscErrorCode ierr;
154b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
155b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
156b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
157c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
158b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
159b2b77a04SHong Zhang 
160b2b77a04SHong Zhang   PetscFunctionBegin;
161b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
162b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
163b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
164b2b77a04SHong Zhang     switch (ndim){
165b2b77a04SHong Zhang     case 1:
1663c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
167b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
168ae0a50aaSHong Zhang #else
169ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
1703c3a9c75SAmlan Barua #endif
171b2b77a04SHong Zhang       break;
172b2b77a04SHong Zhang     case 2:
1733c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
174b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1753c3a9c75SAmlan Barua #else
1763c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1773c3a9c75SAmlan Barua #endif
178b2b77a04SHong Zhang       break;
179b2b77a04SHong Zhang     case 3:
1803c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
181b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1823c3a9c75SAmlan Barua #else
18351d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1843c3a9c75SAmlan Barua #endif
185b2b77a04SHong Zhang       break;
186b2b77a04SHong Zhang     default:
1873c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
188c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1893c3a9c75SAmlan Barua #else
1903c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1913c3a9c75SAmlan Barua #endif
192b2b77a04SHong Zhang       break;
193b2b77a04SHong Zhang     }
194b2b77a04SHong Zhang     fftw->finarray  = x_array;
195b2b77a04SHong Zhang     fftw->foutarray = y_array;
196b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
197b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
198b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
199b2b77a04SHong Zhang   } else { /* use existing plan */
200b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
201b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
202b2b77a04SHong Zhang     } else {
203b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
204b2b77a04SHong Zhang     }
205b2b77a04SHong Zhang   }
206b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
207b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
208b2b77a04SHong Zhang   PetscFunctionReturn(0);
209b2b77a04SHong Zhang }
210b2b77a04SHong Zhang 
211b2b77a04SHong Zhang #undef __FUNCT__
212b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
213b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
214b2b77a04SHong Zhang {
215b2b77a04SHong Zhang   PetscErrorCode ierr;
216b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
217b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
218b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
219c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
220b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
221b2b77a04SHong Zhang 
222b2b77a04SHong Zhang   PetscFunctionBegin;
223b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
224b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
225b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
226b2b77a04SHong Zhang     switch (ndim){
227b2b77a04SHong Zhang     case 1:
2283c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
229b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
230ae0a50aaSHong Zhang #else
231ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
2323c3a9c75SAmlan Barua #endif
233b2b77a04SHong Zhang       break;
234b2b77a04SHong Zhang     case 2:
2353c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
236b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2373c3a9c75SAmlan Barua #else
2383c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2393c3a9c75SAmlan Barua #endif
240b2b77a04SHong Zhang       break;
241b2b77a04SHong Zhang     case 3:
2423c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
243b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2443c3a9c75SAmlan Barua #else
2453c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2463c3a9c75SAmlan Barua #endif
247b2b77a04SHong Zhang       break;
248b2b77a04SHong Zhang     default:
2493c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
250c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2513c3a9c75SAmlan Barua #else
2523c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2533c3a9c75SAmlan Barua #endif
254b2b77a04SHong Zhang       break;
255b2b77a04SHong Zhang     }
256b2b77a04SHong Zhang     fftw->binarray  = x_array;
257b2b77a04SHong Zhang     fftw->boutarray = y_array;
258b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
259b2b77a04SHong Zhang   } else { /* use existing plan */
260b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
261b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
262b2b77a04SHong Zhang     } else {
263b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
264b2b77a04SHong Zhang     }
265b2b77a04SHong Zhang   }
266b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
267b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
268b2b77a04SHong Zhang   PetscFunctionReturn(0);
269b2b77a04SHong Zhang }
270b2b77a04SHong Zhang 
271b2b77a04SHong Zhang #undef __FUNCT__
272b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
273b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
274b2b77a04SHong Zhang {
275b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
276b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
277b2b77a04SHong Zhang   PetscErrorCode ierr;
278b2b77a04SHong Zhang 
279b2b77a04SHong Zhang   PetscFunctionBegin;
280b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
281b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
282b1301b2eSHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
283bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
284b2b77a04SHong Zhang   PetscFunctionReturn(0);
285b2b77a04SHong Zhang }
286b2b77a04SHong Zhang 
287c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
288b2b77a04SHong Zhang #undef __FUNCT__
289b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
290b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
291b2b77a04SHong Zhang {
292b2b77a04SHong Zhang   PetscErrorCode  ierr;
293b2b77a04SHong Zhang   PetscScalar     *array;
294b2b77a04SHong Zhang 
295b2b77a04SHong Zhang   PetscFunctionBegin;
296b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
297b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
298b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
299b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
300b2b77a04SHong Zhang   PetscFunctionReturn(0);
301b2b77a04SHong Zhang }
302b2b77a04SHong Zhang 
303b2b77a04SHong Zhang #undef __FUNCT__
3044be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_1DC"
305c92418ccSAmlan Barua /*
3064be45526SHong Zhang     - Get Vectors(s) compatible with matrix, i.e. with the
307c92418ccSAmlan Barua      parallel layout determined by FFTW-1D
308c92418ccSAmlan Barua 
309c92418ccSAmlan Barua */
3104be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_1DC(Mat A,Vec *fin,Vec *fout,Vec *bout)
311c92418ccSAmlan Barua {
312c92418ccSAmlan Barua   PetscErrorCode ierr;
313c92418ccSAmlan Barua   PetscMPIInt    size,rank;
314c92418ccSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
315c92418ccSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
316c92418ccSAmlan Barua //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
317c92418ccSAmlan Barua   PetscInt       N=fft->N;
318c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
319c92418ccSAmlan Barua   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
320c92418ccSAmlan Barua   ptrdiff_t      f_local_n1,f_local_1_end;
321c92418ccSAmlan Barua   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
322c92418ccSAmlan Barua   ptrdiff_t      b_local_n1,b_local_1_end;
323ae0a50aaSHong Zhang   fftw_complex   *data_fin,*data_fout,*data_bout;
324c92418ccSAmlan Barua 
325c92418ccSAmlan Barua   PetscFunctionBegin;
326c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
327c92418ccSAmlan Barua   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
328c92418ccSAmlan Barua #endif
329c92418ccSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
330c92418ccSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
331c92418ccSAmlan Barua   if (size == 1){
332c92418ccSAmlan Barua     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
333c92418ccSAmlan Barua   }
334c92418ccSAmlan Barua   else {
335c92418ccSAmlan Barua       if (ndim>1){
336c92418ccSAmlan Barua         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
337c92418ccSAmlan Barua       else {
338c92418ccSAmlan Barua           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
339c92418ccSAmlan Barua           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
340c92418ccSAmlan Barua           if (fin) {
341c92418ccSAmlan Barua             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
342c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
343c92418ccSAmlan Barua             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
344c92418ccSAmlan Barua           }
345c92418ccSAmlan Barua           if (fout) {
346c92418ccSAmlan Barua             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
347c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
348c92418ccSAmlan Barua             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
349c92418ccSAmlan Barua           }
350c92418ccSAmlan Barua           if (bout) {
351c92418ccSAmlan Barua             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
352c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
353c92418ccSAmlan Barua             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
354c92418ccSAmlan Barua           }
355c92418ccSAmlan Barua   }
356c92418ccSAmlan Barua   if (fin){
357c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
358c92418ccSAmlan Barua   }
359c92418ccSAmlan Barua   if (fout){
360c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
361c92418ccSAmlan Barua   }
362c92418ccSAmlan Barua   if (bout){
363c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
364c92418ccSAmlan Barua   }
365c92418ccSAmlan Barua   PetscFunctionReturn(0);
366c92418ccSAmlan Barua }
367c92418ccSAmlan Barua 
368c92418ccSAmlan Barua 
369c92418ccSAmlan Barua }
3703c3a9c75SAmlan Barua 
3714f7415efSAmlan Barua #undef __FUNCT__
3724be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW"
3734be45526SHong Zhang /*@
374b2aa233eSHong Zhang    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
3754f7415efSAmlan Barua      parallel layout determined by FFTW
3764f7415efSAmlan Barua 
3774f7415efSAmlan Barua    Collective on Mat
3784f7415efSAmlan Barua 
3794f7415efSAmlan Barua    Input Parameter:
3804f7415efSAmlan Barua .  mat - the matrix
3814f7415efSAmlan Barua 
3824f7415efSAmlan Barua    Output Parameter:
3834f7415efSAmlan Barua +   fin - (optional) input vector of forward FFTW
3844f7415efSAmlan Barua -   fout - (optional) output vector of forward FFTW
3854f7415efSAmlan Barua 
3864f7415efSAmlan Barua   Level: advanced
3874f7415efSAmlan Barua 
3884f7415efSAmlan Barua .seealso: MatCreateFFTW()
3894be45526SHong Zhang @*/
3904be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
3914be45526SHong Zhang {
3924be45526SHong Zhang   PetscErrorCode ierr;
3934be45526SHong Zhang   PetscFunctionBegin;
3944be45526SHong Zhang   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
3954be45526SHong Zhang   PetscFunctionReturn(0);
3964be45526SHong Zhang }
3974be45526SHong Zhang 
3984f7415efSAmlan Barua EXTERN_C_BEGIN
3994be45526SHong Zhang #undef __FUNCT__
4004be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW"
4014be45526SHong Zhang PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
4024f7415efSAmlan Barua {
4034f7415efSAmlan Barua   PetscErrorCode ierr;
4044f7415efSAmlan Barua   PetscMPIInt    size,rank;
4054f7415efSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
4064f7415efSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
4074f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
4089496c9aeSAmlan Barua   PetscInt       N=fft->N;
4094f7415efSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
4104f7415efSAmlan Barua 
4114f7415efSAmlan Barua   PetscFunctionBegin;
4124f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
4134f7415efSAmlan Barua   PetscValidType(A,1);
4144f7415efSAmlan Barua 
4154f7415efSAmlan Barua   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
4164f7415efSAmlan Barua   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
4174f7415efSAmlan Barua   if (size == 1){ /* sequential case */
4184f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4194f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
4204f7415efSAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
4214f7415efSAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
4224f7415efSAmlan Barua #else
4239496c9aeSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
4244f7415efSAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
4254f7415efSAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);}
4264f7415efSAmlan Barua #endif
4274f7415efSAmlan Barua   } else {
4284f7415efSAmlan Barua     ptrdiff_t      alloc_local,local_n0,local_0_start;
4299496c9aeSAmlan Barua     ptrdiff_t      local_n1;
4309496c9aeSAmlan Barua     fftw_complex   *data_fout;
4319496c9aeSAmlan Barua     ptrdiff_t      local_1_start;
4329496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4339496c9aeSAmlan Barua     fftw_complex   *data_fin,*data_bout;
4349496c9aeSAmlan Barua #else
4354f7415efSAmlan Barua     double         *data_finr,*data_boutr;
4369496c9aeSAmlan Barua     PetscInt       n1,N1,vsize;
4379496c9aeSAmlan Barua     ptrdiff_t      temp;
4389496c9aeSAmlan Barua #endif
4399496c9aeSAmlan Barua 
4404f7415efSAmlan Barua     switch (ndim){
4414f7415efSAmlan Barua           case 1:
44257625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
44357625b84SAmlan Barua                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
44457625b84SAmlan Barua #else
4459496c9aeSAmlan Barua                  //SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet");
44657625b84SAmlan Barua                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
44757625b84SAmlan Barua                  if (fin) {
44857625b84SAmlan Barua                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
44957625b84SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
45057625b84SAmlan Barua                          (*fin)->ops->destroy = VecDestroy_MPIFFTW;
45157625b84SAmlan Barua                          }
45257625b84SAmlan Barua                  if (fout) {
45357625b84SAmlan Barua                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
45457625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
45557625b84SAmlan Barua                           (*fout)->ops->destroy = VecDestroy_MPIFFTW;
45657625b84SAmlan Barua                          }
45757625b84SAmlan Barua                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
45857625b84SAmlan Barua                  if (bout) {
45957625b84SAmlan Barua                           data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
46057625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
46157625b84SAmlan Barua                           (*bout)->ops->destroy = VecDestroy_MPIFFTW;
46257625b84SAmlan Barua                          }
46357625b84SAmlan Barua           break;
46457625b84SAmlan Barua #endif
4654f7415efSAmlan Barua           case 2:
4664f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4674f7415efSAmlan Barua                  alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4684f7415efSAmlan Barua                  N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
4694f7415efSAmlan Barua                  if (fin) {
4704f7415efSAmlan Barua                            data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4714f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
4724f7415efSAmlan Barua                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4734f7415efSAmlan Barua                           }
4744f7415efSAmlan Barua                  if (bout) {
4754f7415efSAmlan Barua                            data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4764f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
4774f7415efSAmlan Barua                            ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr);
4784f7415efSAmlan Barua                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4794f7415efSAmlan Barua                           }
480c8a8a4f0SAmlan Barua                  //n1 = 2*local_n1*dim[0];
48157625b84SAmlan Barua                  if (fout) {
48257625b84SAmlan Barua                             data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4839496c9aeSAmlan Barua                             ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
48457625b84SAmlan Barua                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
48557625b84SAmlan Barua                            }
4864f7415efSAmlan Barua #else
4874f7415efSAmlan Barua       /* Get local size */
4884f7415efSAmlan Barua                  alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
4894f7415efSAmlan Barua                  if (fin) {
4904f7415efSAmlan Barua                            data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4914f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
4924f7415efSAmlan Barua                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4934f7415efSAmlan Barua                           }
4944f7415efSAmlan Barua                  if (fout) {
4954f7415efSAmlan Barua                             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4964f7415efSAmlan Barua                             ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
4974f7415efSAmlan Barua                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
4984f7415efSAmlan Barua                            }
4994f7415efSAmlan Barua                  if (bout) {
5004f7415efSAmlan Barua                            data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5014f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5024f7415efSAmlan Barua                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5034f7415efSAmlan Barua                           }
5044f7415efSAmlan Barua #endif
5054f7415efSAmlan Barua           break;
5064f7415efSAmlan Barua           case 3:
5074f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5084f7415efSAmlan Barua                  alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5094f7415efSAmlan Barua                  N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
5104f7415efSAmlan Barua                  if (fin) {
5114f7415efSAmlan Barua                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5124f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
5134f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5144f7415efSAmlan Barua                          }
5154f7415efSAmlan Barua                  if (bout) {
5164f7415efSAmlan Barua                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5174f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
5184f7415efSAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5194f7415efSAmlan Barua                           }
520c8a8a4f0SAmlan Barua                  //n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
52157625b84SAmlan Barua                  if (fout) {
52257625b84SAmlan Barua                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
52357625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
52457625b84SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
52557625b84SAmlan Barua                           }
5264f7415efSAmlan Barua #else
5270c9b18e4SAmlan Barua                  alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
5280c9b18e4SAmlan Barua                  if (fin) {
5290c9b18e4SAmlan Barua                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5300c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5310c9b18e4SAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5320c9b18e4SAmlan Barua                          }
5330c9b18e4SAmlan Barua                  if (fout) {
5340c9b18e4SAmlan Barua                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5350c9b18e4SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5360c9b18e4SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5370c9b18e4SAmlan Barua                          }
5380c9b18e4SAmlan Barua                  if (bout) {
5390c9b18e4SAmlan Barua                          data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5400c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5410c9b18e4SAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5420c9b18e4SAmlan Barua                          }
5434f7415efSAmlan Barua #endif
5444f7415efSAmlan Barua           break;
5454f7415efSAmlan Barua           default:
5464f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5474f7415efSAmlan Barua                  temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
5484f7415efSAmlan Barua                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
5494f7415efSAmlan Barua                  alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5504f7415efSAmlan Barua                  N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
5514f7415efSAmlan Barua                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
5524f7415efSAmlan Barua 
5534f7415efSAmlan Barua                  if (fin) {
5544f7415efSAmlan Barua                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5554f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
5564f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5574f7415efSAmlan Barua                         }
5584f7415efSAmlan Barua                  if (bout) {
5594f7415efSAmlan Barua                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5604f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
5619496c9aeSAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5624f7415efSAmlan Barua                         }
563c8a8a4f0SAmlan Barua                  //temp = fftw->partial_dim;
564c8a8a4f0SAmlan Barua                  //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
565c8a8a4f0SAmlan Barua                  //n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
56657625b84SAmlan Barua                  if (fout) {
56757625b84SAmlan Barua                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
56857625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
56957625b84SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
57057625b84SAmlan Barua                         }
5714f7415efSAmlan Barua #else
5720c9b18e4SAmlan Barua                 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
5730c9b18e4SAmlan Barua                 if (fin) {
5740c9b18e4SAmlan Barua                        data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5750c9b18e4SAmlan Barua                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5760c9b18e4SAmlan Barua                        (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5770c9b18e4SAmlan Barua                        }
5780c9b18e4SAmlan Barua                 if (fout) {
5790c9b18e4SAmlan Barua                          data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5800c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5810c9b18e4SAmlan Barua                          (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5820c9b18e4SAmlan Barua                        }
5830c9b18e4SAmlan Barua                 if (bout) {
5840c9b18e4SAmlan Barua                        data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5850c9b18e4SAmlan Barua                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5860c9b18e4SAmlan Barua                        (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5870c9b18e4SAmlan Barua                        }
5884f7415efSAmlan Barua #endif
5894f7415efSAmlan Barua             break;
5904f7415efSAmlan Barua           }
5914f7415efSAmlan Barua   }
5924f7415efSAmlan Barua   PetscFunctionReturn(0);
5934f7415efSAmlan Barua }
5944f7415efSAmlan Barua EXTERN_C_END
5954f7415efSAmlan Barua 
596c92418ccSAmlan Barua #undef __FUNCT__
597b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW"
598b2b77a04SHong Zhang /*
599b2b77a04SHong Zhang    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
600b2b77a04SHong Zhang      parallel layout determined by FFTW
601b2b77a04SHong Zhang 
602b2b77a04SHong Zhang    Collective on Mat
603b2b77a04SHong Zhang 
604b2b77a04SHong Zhang    Input Parameter:
605b2b77a04SHong Zhang .  mat - the matrix
606b2b77a04SHong Zhang 
607b2b77a04SHong Zhang    Output Parameter:
608b2b77a04SHong Zhang +   fin - (optional) input vector of forward FFTW
609b2b77a04SHong Zhang -   fout - (optional) output vector of forward FFTW
610b2b77a04SHong Zhang 
611b2b77a04SHong Zhang   Level: advanced
612b2b77a04SHong Zhang 
613b2b77a04SHong Zhang .seealso: MatCreateFFTW()
614b2b77a04SHong Zhang */
615b2b77a04SHong Zhang PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
616b2b77a04SHong Zhang {
617b2b77a04SHong Zhang   PetscErrorCode ierr;
618b2b77a04SHong Zhang   PetscMPIInt    size,rank;
619b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
620b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
621c92418ccSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
6229496c9aeSAmlan Barua   PetscInt       N=fft->N;
623e81bb599SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
624b2b77a04SHong Zhang 
625b2b77a04SHong Zhang   PetscFunctionBegin;
626b2b77a04SHong Zhang   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
627b2b77a04SHong Zhang   PetscValidType(A,1);
628b2b77a04SHong Zhang 
629b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
630b2b77a04SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
631b2b77a04SHong Zhang   if (size == 1){ /* sequential case */
632e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX)
633b2b77a04SHong Zhang     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
634b2b77a04SHong Zhang     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
635e5d7f247SAmlan Barua #else
636e81bb599SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
637e81bb599SAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
638e81bb599SAmlan Barua #endif
639b2b77a04SHong Zhang   } else {        /* mpi case */
640b2b77a04SHong Zhang     ptrdiff_t      alloc_local,local_n0,local_0_start;
6416882372aSHong Zhang     ptrdiff_t      local_n1,local_1_end;
642b2b77a04SHong Zhang     fftw_complex   *data_fin,*data_fout;
6439496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
64451d1eed7SAmlan Barua     double         *data_finr ;
645b3a17365SAmlan Barua     ptrdiff_t      local_1_start,temp;
6469496c9aeSAmlan Barua     PetscInt       vsize,n1,N1;
6479496c9aeSAmlan Barua #endif
6489496c9aeSAmlan Barua 
649c92418ccSAmlan Barua //    PetscInt ctr;
650c92418ccSAmlan Barua //    ptrdiff_t      ndim1,*pdim;
651c92418ccSAmlan Barua //    ndim1=(ptrdiff_t) ndim;
652c92418ccSAmlan Barua //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
65311902ff2SHong Zhang 
654c92418ccSAmlan Barua //    for(ctr=0;ctr<ndim;ctr++)
655f76f76beSAmlan Barua //        {k
656c92418ccSAmlan Barua //           pdim[ctr] = dim[ctr];
657c92418ccSAmlan Barua //       }
658b2b77a04SHong Zhang 
659f76f76beSAmlan Barua 
660f76f76beSAmlan Barua 
661b2b77a04SHong Zhang     switch (ndim){
662b2b77a04SHong Zhang     case 1:
6636882372aSHong Zhang       /* Get local size */
664e5d7f247SAmlan Barua       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
665c92418ccSAmlan Barua //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
6666882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
6679496c9aeSAmlan Barua       printf("The values of local_n0 and local_n1 are %ld and %ld\n",local_n0,local_n1);
6686882372aSHong Zhang       if (fin) {
6696882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6706882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
6716882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6726882372aSHong Zhang       }
6736882372aSHong Zhang       if (fout) {
6746882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
67557625b84SAmlan Barua         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6766882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6776882372aSHong Zhang       }
678b2b77a04SHong Zhang       break;
679b2b77a04SHong Zhang     case 2:
6803c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6813c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
6823c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
6833c3a9c75SAmlan Barua       if (fin) {
6843c3a9c75SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
68554dd5118SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
6863c3a9c75SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
68709dd8a53SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
6883c3a9c75SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6893c3a9c75SAmlan Barua       }
69057625b84SAmlan Barua       n1 = 2*local_n1*(dim[0]);
69157625b84SAmlan Barua       //n1 = 2*local_n1*dim[1];
69257625b84SAmlan Barua       printf("The values are %ld\n",local_n1);
6933c3a9c75SAmlan Barua       if (fout) {
69409dd8a53SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
69509dd8a53SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6963c3a9c75SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6973c3a9c75SAmlan Barua       }
698f76f76beSAmlan Barua       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
6993c3a9c75SAmlan Barua 
7003c3a9c75SAmlan Barua #else
701b2b77a04SHong Zhang       /* Get local size */
70264657d84SAmlan Barua      //printf("Hope this does not come here");
703b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
704b2b77a04SHong Zhang       if (fin) {
705b2b77a04SHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
706b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
707b2b77a04SHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
708b2b77a04SHong Zhang       }
709b2b77a04SHong Zhang       if (fout) {
710b2b77a04SHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
711b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
712b2b77a04SHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
713b2b77a04SHong Zhang       }
71464657d84SAmlan Barua      //printf("Hope this does not come here");
7153c3a9c75SAmlan Barua #endif
716b2b77a04SHong Zhang       break;
717b2b77a04SHong Zhang     case 3:
7186882372aSHong Zhang       /* Get local size */
7193c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
72051d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
72151d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
72251d1eed7SAmlan Barua       if (fin) {
72351d1eed7SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
72451d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
72551d1eed7SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
72651d1eed7SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
72751d1eed7SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
72851d1eed7SAmlan Barua       }
72957625b84SAmlan Barua       printf("The value is %ld",local_n1);
73057625b84SAmlan Barua       n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
73151d1eed7SAmlan Barua       if (fout) {
73251d1eed7SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
73351d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
73451d1eed7SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
73551d1eed7SAmlan Barua       }
73651d1eed7SAmlan Barua       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
73751d1eed7SAmlan Barua 
73851d1eed7SAmlan Barua 
7393c3a9c75SAmlan Barua #else
7406882372aSHong Zhang       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
74111902ff2SHong Zhang //      printf("The quantity n is %d",n);
7426882372aSHong Zhang       if (fin) {
7436882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7446882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
7456882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
7466882372aSHong Zhang       }
7476882372aSHong Zhang       if (fout) {
7486882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7496882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
7506882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
7516882372aSHong Zhang       }
7523c3a9c75SAmlan Barua #endif
753b2b77a04SHong Zhang       break;
754b2b77a04SHong Zhang     default:
7556882372aSHong Zhang       /* Get local size */
7563c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
757b3a17365SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
758b3a17365SAmlan Barua       printf("The value of temp is %ld\n",temp);
759b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
760b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
761b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
762b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
763b3a17365SAmlan Barua       if (fin) {
764b3a17365SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
765b3a17365SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
766b3a17365SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
767b3a17365SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
768b3a17365SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
769b3a17365SAmlan Barua       }
77057625b84SAmlan Barua       printf("The value is %ld. Global length is %d \n",local_n1,N1);
77157625b84SAmlan Barua       temp = fftw->partial_dim;
77257625b84SAmlan Barua       fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
77357625b84SAmlan Barua 
77457625b84SAmlan Barua       n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
775b3a17365SAmlan Barua       if (fout) {
776b3a17365SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
77757625b84SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
778b3a17365SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
779b3a17365SAmlan Barua       }
780b3a17365SAmlan Barua 
7813c3a9c75SAmlan Barua #else
782c92418ccSAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
78311902ff2SHong Zhang //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
78411902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
78511902ff2SHong Zhang //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
78611902ff2SHong Zhang //      for(i=0;i<ndim;i++)
78711902ff2SHong Zhang //         {
78811902ff2SHong Zhang //          pdim[i]=dim[i];printf("%d",pdim[i]);
78911902ff2SHong Zhang //         }
79011902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
79111902ff2SHong Zhang //      printf("The quantity n is %d",n);
7926882372aSHong Zhang       if (fin) {
7936882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7946882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
7956882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
7966882372aSHong Zhang       }
7976882372aSHong Zhang       if (fout) {
7986882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7996882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
8006882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
8016882372aSHong Zhang       }
8023c3a9c75SAmlan Barua #endif
803b2b77a04SHong Zhang       break;
804b2b77a04SHong Zhang     }
805b2b77a04SHong Zhang   }
80654dd5118SAmlan Barua //  if (fin){
80754dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
80854dd5118SAmlan Barua //  }
80954dd5118SAmlan Barua //  if (fout){
81054dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
81154dd5118SAmlan Barua //  }
812b2b77a04SHong Zhang   PetscFunctionReturn(0);
813b2b77a04SHong Zhang }
814b2b77a04SHong Zhang 
815b2b77a04SHong Zhang #undef __FUNCT__
8163c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT"
8173c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
8183c3a9c75SAmlan Barua {
8193c3a9c75SAmlan Barua   PetscErrorCode ierr;
8203c3a9c75SAmlan Barua   PetscFunctionBegin;
8213c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
8223c3a9c75SAmlan Barua   PetscFunctionReturn(0);
8233c3a9c75SAmlan Barua }
82454efbe56SHong Zhang 
825b2b77a04SHong Zhang /*
8269496c9aeSAmlan Barua       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block. For real
8279496c9aeSAmlan Barua       parallel FFT, this routine also performs padding of right number of zeros at the end of the fastetst
8289496c9aeSAmlan Barua       changing dimension.
8293c3a9c75SAmlan Barua   Input A, x, y
8303c3a9c75SAmlan Barua   A - FFTW matrix
83154dd5118SAmlan Barua   x - user data
832b2b77a04SHong Zhang   Options Database Keys:
833b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags
834b2b77a04SHong Zhang 
835b2b77a04SHong Zhang    Level: intermediate
836b2b77a04SHong Zhang 
837b2b77a04SHong Zhang */
8383c3a9c75SAmlan Barua 
8393c3a9c75SAmlan Barua EXTERN_C_BEGIN
8403c3a9c75SAmlan Barua #undef __FUNCT__
8413c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW"
8423c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
8433c3a9c75SAmlan Barua {
8443c3a9c75SAmlan Barua   PetscErrorCode ierr;
8453c3a9c75SAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
8463c3a9c75SAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
8473c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
8489496c9aeSAmlan Barua   PetscInt       N=fft->N;
849b223cc97SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
8509496c9aeSAmlan Barua   PetscInt       low;
8519496c9aeSAmlan Barua   PetscInt       rank,size;
8523c3a9c75SAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
8539496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
8543c3a9c75SAmlan Barua   VecScatter     vecscat;
8553c3a9c75SAmlan Barua   IS             list1,list2;
8569496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
8579496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
8589496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
8599496c9aeSAmlan Barua   PetscInt       N1, n1 ,NM;
8609496c9aeSAmlan Barua   ptrdiff_t      temp;
8619496c9aeSAmlan Barua #endif
8623c3a9c75SAmlan Barua 
863f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
864f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
8653c3a9c75SAmlan Barua   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
8663c3a9c75SAmlan Barua 
867e81bb599SAmlan Barua   if (size==1)
868e81bb599SAmlan Barua     {
8697536937bSAmlan Barua /*     switch (ndim){
870e81bb599SAmlan Barua      case 1:
871e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
872e81bb599SAmlan Barua           for (i=0;i<dim[0];i++)
873e81bb599SAmlan Barua              {
874e81bb599SAmlan Barua               indx1[i] = i;
875e81bb599SAmlan Barua              }
876e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
877e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
878e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
879e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
880e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
881b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
882b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
883e81bb599SAmlan Barua      break;
884e81bb599SAmlan Barua 
885e81bb599SAmlan Barua      case 2:
886e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
887e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
888e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
889e81bb599SAmlan Barua                 indx1[i*dim[1]+j] = i*dim[1] + j;
890e81bb599SAmlan Barua              }
891e81bb599SAmlan Barua           }
892e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
893e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
894e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
895e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
896e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
897b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
898b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
899e81bb599SAmlan Barua           break;
900e81bb599SAmlan Barua      case 3:
901e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
902e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
903e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
904e81bb599SAmlan Barua                 for (k=0;k<dim[2];k++){
905e81bb599SAmlan Barua                    indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
906e81bb599SAmlan Barua                 }
907e81bb599SAmlan Barua              }
908e81bb599SAmlan Barua           }
909e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
910e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
911e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
912e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
913e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
914b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
915b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
916e81bb599SAmlan Barua           break;
917e81bb599SAmlan Barua      default:
9187536937bSAmlan Barua */
9199496c9aeSAmlan Barua           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
9206971673cSAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
9216971673cSAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9226971673cSAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9236971673cSAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
924b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
9257536937bSAmlan Barua  //         break;
9267536937bSAmlan Barua   //    }
927e81bb599SAmlan Barua     }
928e81bb599SAmlan Barua 
929e81bb599SAmlan Barua  else{
930e81bb599SAmlan Barua 
9313c3a9c75SAmlan Barua  switch (ndim){
9323c3a9c75SAmlan Barua  case 1:
93364657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
93464657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
93564657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
93664657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
93764657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
93864657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
93964657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
94064657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
94164657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
94264657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
94364657d84SAmlan Barua #else
944e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
94564657d84SAmlan Barua #endif
9463c3a9c75SAmlan Barua  break;
9473c3a9c75SAmlan Barua  case 2:
948bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
949bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
950bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
951bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
952bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
953bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
954bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
955bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
956bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
957bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
958bd59e6c6SAmlan Barua #else
9593c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
9603c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
9619496c9aeSAmlan Barua       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
9629496c9aeSAmlan Barua       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
9639496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
9649496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
9653c3a9c75SAmlan Barua 
9663c3a9c75SAmlan Barua       if (dim[1]%2==0)
9673c3a9c75SAmlan Barua         NM = dim[1]+2;
9683c3a9c75SAmlan Barua       else
9693c3a9c75SAmlan Barua         NM = dim[1]+1;
9703c3a9c75SAmlan Barua 
9713c3a9c75SAmlan Barua       for (i=0;i<local_n0;i++){
9723c3a9c75SAmlan Barua          for (j=0;j<dim[1];j++){
9733c3a9c75SAmlan Barua             tempindx = i*dim[1] + j;
9743c3a9c75SAmlan Barua             tempindx1 = i*NM + j;
9755b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
9763c3a9c75SAmlan Barua             indx2[tempindx]=low+tempindx1;
9773c3a9c75SAmlan Barua         }
9783c3a9c75SAmlan Barua      }
9793c3a9c75SAmlan Barua 
9803c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
9813c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
9823c3a9c75SAmlan Barua 
983f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
984f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
985f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
986f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
987b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
988b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
989b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
990b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
991bd59e6c6SAmlan Barua #endif
9929496c9aeSAmlan Barua  break;
9933c3a9c75SAmlan Barua 
9943c3a9c75SAmlan Barua  case 3:
995bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
996bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
997bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
998bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
999bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1000bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1001bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1002bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1003bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1005bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1006bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1007bd59e6c6SAmlan Barua #else
100851d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
100951d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
101051d1eed7SAmlan Barua 
10119496c9aeSAmlan Barua //      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
10129496c9aeSAmlan Barua //      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
10139496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
10149496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
101551d1eed7SAmlan Barua 
101651d1eed7SAmlan Barua       if (dim[2]%2==0)
101751d1eed7SAmlan Barua         NM = dim[2]+2;
101851d1eed7SAmlan Barua       else
101951d1eed7SAmlan Barua         NM = dim[2]+1;
102051d1eed7SAmlan Barua 
102151d1eed7SAmlan Barua       for (i=0;i<local_n0;i++){
102251d1eed7SAmlan Barua          for (j=0;j<dim[1];j++){
102351d1eed7SAmlan Barua             for (k=0;k<dim[2];k++){
102451d1eed7SAmlan Barua                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
102551d1eed7SAmlan Barua                tempindx1 = i*dim[1]*NM + j*NM + k;
102651d1eed7SAmlan Barua                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
102751d1eed7SAmlan Barua                indx2[tempindx]=low+tempindx1;
102851d1eed7SAmlan Barua             }
102951d1eed7SAmlan Barua          }
103051d1eed7SAmlan Barua       }
103151d1eed7SAmlan Barua 
103251d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
103351d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
103451d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
103551d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
103651d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
103751d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1038b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1039b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1040b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1041b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1042bd59e6c6SAmlan Barua #endif
10439496c9aeSAmlan Barua  break;
10443c3a9c75SAmlan Barua 
10453c3a9c75SAmlan Barua  default:
1046bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1047bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1048bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1049bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1050bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1051bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1052bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1053bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1054bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1055bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1056bd59e6c6SAmlan Barua #else
1057e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1058e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1059e5d7f247SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1060e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1061e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1062e5d7f247SAmlan Barua 
1063e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
1064e5d7f247SAmlan Barua 
1065e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1066e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1067e5d7f247SAmlan Barua 
1068e5d7f247SAmlan Barua       if (dim[ndim-1]%2==0)
1069ba6e06d0SAmlan Barua         NM = 2;
1070e5d7f247SAmlan Barua       else
1071ba6e06d0SAmlan Barua         NM = 1;
1072e5d7f247SAmlan Barua 
10736971673cSAmlan Barua       j = low;
10746971673cSAmlan Barua       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
10756971673cSAmlan Barua          {
10766971673cSAmlan Barua           indx1[i] = local_0_start*partial_dim + i;
10776971673cSAmlan Barua           indx2[i] = j;
10786971673cSAmlan Barua           if (k%dim[ndim-1]==0)
10796971673cSAmlan Barua             { j+=NM;}
10806971673cSAmlan Barua           j++;
10816971673cSAmlan Barua          }
10826971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
10836971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
10846971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
10856971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10866971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10876971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1088b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1089b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1090b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1091b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1092bd59e6c6SAmlan Barua #endif
10939496c9aeSAmlan Barua  break;
10943c3a9c75SAmlan Barua   }
1095e81bb599SAmlan Barua  }
10963c3a9c75SAmlan Barua 
10973c3a9c75SAmlan Barua  return 0;
10983c3a9c75SAmlan Barua }
10993c3a9c75SAmlan Barua EXTERN_C_END
11003c3a9c75SAmlan Barua 
11013c3a9c75SAmlan Barua #undef __FUNCT__
11023c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT"
11033c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
11043c3a9c75SAmlan Barua {
11053c3a9c75SAmlan Barua   PetscErrorCode ierr;
11063c3a9c75SAmlan Barua   PetscFunctionBegin;
11073c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
11083c3a9c75SAmlan Barua   PetscFunctionReturn(0);
11093c3a9c75SAmlan Barua }
111054efbe56SHong Zhang 
11115b3e41ffSAmlan Barua /*
11125b3e41ffSAmlan Barua       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
11135b3e41ffSAmlan Barua   Input A, x, y
11145b3e41ffSAmlan Barua   A - FFTW matrix
11155b3e41ffSAmlan Barua   x - FFTW vector
11165b3e41ffSAmlan Barua   y - PETSc vector that user can read
11175b3e41ffSAmlan Barua   Options Database Keys:
11185b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
11195b3e41ffSAmlan Barua 
11205b3e41ffSAmlan Barua    Level: intermediate
11215b3e41ffSAmlan Barua 
11223c3a9c75SAmlan Barua */
11233c3a9c75SAmlan Barua 
11243c3a9c75SAmlan Barua EXTERN_C_BEGIN
11253c3a9c75SAmlan Barua #undef __FUNCT__
11265b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW"
11275b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
11285b3e41ffSAmlan Barua {
11295b3e41ffSAmlan Barua   PetscErrorCode ierr;
11305b3e41ffSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
11315b3e41ffSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
11325b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
11339496c9aeSAmlan Barua   PetscInt       N=fft->N;
1134b3655f67SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
11359496c9aeSAmlan Barua   PetscInt       low;
11369496c9aeSAmlan Barua   PetscInt       rank,size;
11375b3e41ffSAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
11389496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
11395b3e41ffSAmlan Barua   VecScatter     vecscat;
11405b3e41ffSAmlan Barua   IS             list1,list2;
11419496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11429496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
11439496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
11449496c9aeSAmlan Barua   PetscInt       N1, n1 ,NM;
11459496c9aeSAmlan Barua   ptrdiff_t      temp;
11469496c9aeSAmlan Barua #endif
11479496c9aeSAmlan Barua 
11485b3e41ffSAmlan Barua 
11495b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
11505b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1151b3655f67SAmlan Barua   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr);
11525b3e41ffSAmlan Barua 
1153e81bb599SAmlan Barua   if (size==1){
11547536937bSAmlan Barua /*
11555b3e41ffSAmlan Barua     switch (ndim){
11565b3e41ffSAmlan Barua     case 1:
1157e81bb599SAmlan Barua            ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
1158e81bb599SAmlan Barua           for (i=0;i<dim[0];i++)
1159e81bb599SAmlan Barua              {
1160e81bb599SAmlan Barua               indx1[i] = i;
1161e81bb599SAmlan Barua              }
1162e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1163e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1164e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1165e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1166e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1167b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
1168b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
1169e81bb599SAmlan Barua           break;
1170e81bb599SAmlan Barua 
1171e81bb599SAmlan Barua     case 2:
1172e81bb599SAmlan Barua          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
1173e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
1174e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
1175e81bb599SAmlan Barua                 indx1[i*dim[1]+j] = i*dim[1] + j;
1176e81bb599SAmlan Barua              }
1177e81bb599SAmlan Barua           }
1178e81bb599SAmlan Barua          ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1179e81bb599SAmlan Barua          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1180e81bb599SAmlan Barua          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1181e81bb599SAmlan Barua          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1182e81bb599SAmlan Barua          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1183b223cc97SAmlan Barua          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1184b223cc97SAmlan Barua          ierr = PetscFree(indx1);CHKERRQ(ierr);
1185e81bb599SAmlan Barua          break;
1186e81bb599SAmlan Barua     case 3:
1187e81bb599SAmlan Barua          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1188e81bb599SAmlan Barua          for (i=0;i<dim[0];i++){
1189e81bb599SAmlan Barua             for (j=0;j<dim[1];j++){
1190e81bb599SAmlan Barua                for (k=0;k<dim[2];k++){
1191e81bb599SAmlan Barua                   indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
1192e81bb599SAmlan Barua                }
1193e81bb599SAmlan Barua             }
1194e81bb599SAmlan Barua          }
1195e81bb599SAmlan Barua          ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1196e81bb599SAmlan Barua          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1197e81bb599SAmlan Barua          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1198e81bb599SAmlan Barua          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1199e81bb599SAmlan Barua          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1200b223cc97SAmlan Barua          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1201b223cc97SAmlan Barua          ierr = PetscFree(indx1);CHKERRQ(ierr);
1202e81bb599SAmlan Barua          break;
1203e81bb599SAmlan Barua     default:
12047536937bSAmlan Barua */
1205b3655f67SAmlan Barua     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
12066971673cSAmlan Barua     //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF);
12076971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
12086971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
12096971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
12106971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1211b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
12127536937bSAmlan Barua   //       break;
12137536937bSAmlan Barua    // }
1214e81bb599SAmlan Barua   }
1215e81bb599SAmlan Barua   else{
1216e81bb599SAmlan Barua 
1217e81bb599SAmlan Barua  switch (ndim){
1218e81bb599SAmlan Barua  case 1:
121964657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
122064657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
12219496c9aeSAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1);
12229496c9aeSAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2);
122364657d84SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
122464657d84SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
122564657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
122664657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
122764657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
122864657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
122964657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
123064657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
123164657d84SAmlan Barua #else
12326a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
123364657d84SAmlan Barua #endif
12345b3e41ffSAmlan Barua  break;
12355b3e41ffSAmlan Barua  case 2:
1236bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1237bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1238bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1239bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
1240bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1241bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1242bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1243bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1244bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1245bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1246bd59e6c6SAmlan Barua #else
12475b3e41ffSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
12485b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
12495b3e41ffSAmlan Barua 
12509496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
12519496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
12525b3e41ffSAmlan Barua 
12535b3e41ffSAmlan Barua       if (dim[1]%2==0)
12545b3e41ffSAmlan Barua         NM = dim[1]+2;
12555b3e41ffSAmlan Barua       else
12565b3e41ffSAmlan Barua         NM = dim[1]+1;
12575b3e41ffSAmlan Barua 
12585b3e41ffSAmlan Barua       for (i=0;i<local_n0;i++){
12595b3e41ffSAmlan Barua          for (j=0;j<dim[1];j++){
12605b3e41ffSAmlan Barua             tempindx = i*dim[1] + j;
12615b3e41ffSAmlan Barua             tempindx1 = i*NM + j;
12625b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
12635b3e41ffSAmlan Barua             indx2[tempindx]=low+tempindx1;
12645b3e41ffSAmlan Barua          }
12655b3e41ffSAmlan Barua        }
12665b3e41ffSAmlan Barua 
12675b3e41ffSAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
12685b3e41ffSAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
12695b3e41ffSAmlan Barua 
12705b3e41ffSAmlan Barua        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
12715b3e41ffSAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
12725b3e41ffSAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
12735b3e41ffSAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1274b223cc97SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1275b223cc97SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1276b223cc97SAmlan Barua        ierr = PetscFree(indx1);CHKERRQ(ierr);
1277b223cc97SAmlan Barua        ierr = PetscFree(indx2);CHKERRQ(ierr);
1278bd59e6c6SAmlan Barua #endif
12799496c9aeSAmlan Barua  break;
12805b3e41ffSAmlan Barua  case 3:
1281bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1282bd59e6c6SAmlan Barua        alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1283bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1284bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1285bd59e6c6SAmlan Barua        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1286bd59e6c6SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1287bd59e6c6SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1288bd59e6c6SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1289bd59e6c6SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1290bd59e6c6SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1291bd59e6c6SAmlan Barua #else
1292bd59e6c6SAmlan Barua 
129351d1eed7SAmlan Barua        alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
129451d1eed7SAmlan Barua        N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
129551d1eed7SAmlan Barua 
12969496c9aeSAmlan Barua //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
12979496c9aeSAmlan Barua //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
12989496c9aeSAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
12999496c9aeSAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
130051d1eed7SAmlan Barua 
130151d1eed7SAmlan Barua        if (dim[2]%2==0)
130251d1eed7SAmlan Barua         NM = dim[2]+2;
130351d1eed7SAmlan Barua        else
130451d1eed7SAmlan Barua         NM = dim[2]+1;
130551d1eed7SAmlan Barua 
130651d1eed7SAmlan Barua        for (i=0;i<local_n0;i++){
130751d1eed7SAmlan Barua           for (j=0;j<dim[1];j++){
130851d1eed7SAmlan Barua              for (k=0;k<dim[2];k++){
130951d1eed7SAmlan Barua                 tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
131051d1eed7SAmlan Barua                 tempindx1 = i*dim[1]*NM + j*NM + k;
131151d1eed7SAmlan Barua                 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
131251d1eed7SAmlan Barua                 indx2[tempindx]=low+tempindx1;
131351d1eed7SAmlan Barua              }
131451d1eed7SAmlan Barua         //    printf("Val tempindx1 = %d\n",tempindx1);
131551d1eed7SAmlan Barua         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
131651d1eed7SAmlan Barua         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
131751d1eed7SAmlan Barua         //    printf("-------------------------\n",indx2[tempindx],rank);
131851d1eed7SAmlan Barua           }
131951d1eed7SAmlan Barua        }
132051d1eed7SAmlan Barua 
132151d1eed7SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
132251d1eed7SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
132351d1eed7SAmlan Barua 
132451d1eed7SAmlan Barua        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
132551d1eed7SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
132651d1eed7SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
132751d1eed7SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1328b223cc97SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1329b223cc97SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1330b223cc97SAmlan Barua        ierr = PetscFree(indx1);CHKERRQ(ierr);
1331b223cc97SAmlan Barua        ierr = PetscFree(indx2);CHKERRQ(ierr);
1332bd59e6c6SAmlan Barua #endif
13339496c9aeSAmlan Barua  break;
13345b3e41ffSAmlan Barua  default:
1335bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1336bd59e6c6SAmlan Barua        alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1337bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1338bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1339bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1340bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1341bd59e6c6SAmlan Barua        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1342bd59e6c6SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1343bd59e6c6SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1344bd59e6c6SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1345bd59e6c6SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1346bd59e6c6SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1347bd59e6c6SAmlan Barua #else
1348ba6e06d0SAmlan Barua        temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1349ba6e06d0SAmlan Barua        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1350ba6e06d0SAmlan Barua        alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1351ba6e06d0SAmlan Barua        N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1352ba6e06d0SAmlan Barua        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1353ba6e06d0SAmlan Barua 
1354ba6e06d0SAmlan Barua        partial_dim = fftw->partial_dim;
1355ba6e06d0SAmlan Barua 
1356ba6e06d0SAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1357ba6e06d0SAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1358ba6e06d0SAmlan Barua 
1359ba6e06d0SAmlan Barua        if (dim[ndim-1]%2==0)
1360ba6e06d0SAmlan Barua          NM = 2;
1361ba6e06d0SAmlan Barua        else
1362ba6e06d0SAmlan Barua          NM = 1;
1363ba6e06d0SAmlan Barua 
1364ba6e06d0SAmlan Barua        j = low;
1365ba6e06d0SAmlan Barua        for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1366ba6e06d0SAmlan Barua           {
1367ba6e06d0SAmlan Barua            indx1[i] = local_0_start*partial_dim + i;
1368ba6e06d0SAmlan Barua            indx2[i] = j;
1369ba6e06d0SAmlan Barua            if (k%dim[ndim-1]==0)
1370ba6e06d0SAmlan Barua              { j+=NM;}
1371ba6e06d0SAmlan Barua            j++;
1372ba6e06d0SAmlan Barua           }
1373ba6e06d0SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1374ba6e06d0SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1375ba6e06d0SAmlan Barua 
1376ba6e06d0SAmlan Barua        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1377ba6e06d0SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1378ba6e06d0SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1379ba6e06d0SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1380b223cc97SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1381b223cc97SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1382b223cc97SAmlan Barua        ierr = PetscFree(indx1);CHKERRQ(ierr);
1383b223cc97SAmlan Barua        ierr = PetscFree(indx2);CHKERRQ(ierr);
1384bd59e6c6SAmlan Barua #endif
13859496c9aeSAmlan Barua  break;
13865b3e41ffSAmlan Barua  }
1387e81bb599SAmlan Barua  }
13885b3e41ffSAmlan Barua  return 0;
13895b3e41ffSAmlan Barua }
13905b3e41ffSAmlan Barua EXTERN_C_END
13915b3e41ffSAmlan Barua 
13925b3e41ffSAmlan Barua EXTERN_C_BEGIN
13935b3e41ffSAmlan Barua #undef __FUNCT__
13943c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
13953c3a9c75SAmlan Barua /*
13963c3a9c75SAmlan Barua       MatCreate_FFTW - Creates a matrix object that provides FFT
13973c3a9c75SAmlan Barua   via the external package FFTW
13983c3a9c75SAmlan Barua   Options Database Keys:
13993c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
14003c3a9c75SAmlan Barua 
14013c3a9c75SAmlan Barua    Level: intermediate
14023c3a9c75SAmlan Barua 
14033c3a9c75SAmlan Barua */
14043c3a9c75SAmlan Barua 
1405b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A)
1406b2b77a04SHong Zhang {
1407b2b77a04SHong Zhang   PetscErrorCode ierr;
1408b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
1409b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1410b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1411b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1412b2b77a04SHong Zhang   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1413b2b77a04SHong Zhang   PetscBool      flg;
1414*4f48bc67SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr;
141511902ff2SHong Zhang   PetscMPIInt    size,rank;
14169496c9aeSAmlan Barua   ptrdiff_t      *pdim;
14179496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
14189496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
14199496c9aeSAmlan Barua    ptrdiff_t     temp;
14209496c9aeSAmlan Barua    PetscInt      N1;
1421*4f48bc67SAmlan Barua #else
1422*4f48bc67SAmlan Barua    PetscInt n1;
14239496c9aeSAmlan Barua #endif
14249496c9aeSAmlan Barua 
1425b2b77a04SHong Zhang 
1426b2b77a04SHong Zhang   PetscFunctionBegin;
1427b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
142811902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
142954dd5118SAmlan Barua   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1430c92418ccSAmlan Barua 
143111902ff2SHong Zhang   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
143211902ff2SHong Zhang   pdim[0] = dim[0];
143311902ff2SHong Zhang   for (ctr=1;ctr<ndim;ctr++)
143411902ff2SHong Zhang      {
14356882372aSHong Zhang           partial_dim *= dim[ctr];
143611902ff2SHong Zhang           pdim[ctr] = dim[ctr];
14376882372aSHong Zhang      }
14383c3a9c75SAmlan Barua 
1439b2b77a04SHong Zhang   if (size == 1) {
1440e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1441b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1442b2b77a04SHong Zhang     n = N;
1443e81bb599SAmlan Barua #else
14449496c9aeSAmlan Barua     PetscInt tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1];
1445e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1446e81bb599SAmlan Barua     n = tot_dim;
1447e81bb599SAmlan Barua #endif
1448e81bb599SAmlan Barua 
1449b2b77a04SHong Zhang   } else {
14509496c9aeSAmlan Barua     ptrdiff_t alloc_local,local_n0,local_0_start;//local_1_end;
1451b2b77a04SHong Zhang     switch (ndim){
1452b2b77a04SHong Zhang     case 1:
14533c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
14543c3a9c75SAmlan Barua    SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1455e5d7f247SAmlan Barua #else
14569496c9aeSAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
14576882372aSHong Zhang       n = (PetscInt)local_n0;
14589496c9aeSAmlan Barua       n1 = (PetscInt) local_n1;
14599496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1460e5d7f247SAmlan Barua #endif
1461b2b77a04SHong Zhang       break;
1462b2b77a04SHong Zhang     case 2:
14635b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
1464b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1465b2b77a04SHong Zhang       /*
1466b2b77a04SHong Zhang        PetscMPIInt    rank;
1467b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1468b2b77a04SHong Zhang        PetscSynchronizedFlush(comm);
1469b2b77a04SHong Zhang        */
1470b2b77a04SHong Zhang       n = (PetscInt)local_n0*dim[1];
1471b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
14725b3e41ffSAmlan Barua #else
14735b3e41ffSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
14745b3e41ffSAmlan Barua       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1475c8a8a4f0SAmlan Barua //      n1 = 2*(PetscInt)local_n1*(dim[0]);
1476c8a8a4f0SAmlan Barua //      ierr = MatSetSizes(A,n1,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1477c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
14785b3e41ffSAmlan Barua #endif
1479b2b77a04SHong Zhang       break;
1480b2b77a04SHong Zhang     case 3:
148151d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
148251d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
14836882372aSHong Zhang       n = (PetscInt)local_n0*dim[1]*dim[2];
14846882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
148551d1eed7SAmlan Barua #else
148651d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
148751d1eed7SAmlan Barua       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1488c8a8a4f0SAmlan Barua //      n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
1489c8a8a4f0SAmlan Barua //      ierr = MatSetSizes(A,n1,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1490c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
149151d1eed7SAmlan Barua #endif
1492b2b77a04SHong Zhang       break;
1493b2b77a04SHong Zhang     default:
1494b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
149511902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
14966882372aSHong Zhang       n = (PetscInt)local_n0*partial_dim;
14976882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1498b3a17365SAmlan Barua #else
1499b3a17365SAmlan Barua       temp = pdim[ndim-1];
1500b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
1501b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1502b3a17365SAmlan Barua       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1503b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1504b3a17365SAmlan Barua       pdim[ndim-1] = temp;
1505c8a8a4f0SAmlan Barua //      temp = partial_dim*(dim[ndim-1]/2+1)*dim[0]/(dim[1]*dim[ndim-1]);
1506c8a8a4f0SAmlan Barua //      n1 = 2*local_n1*temp;
1507c8a8a4f0SAmlan Barua //      ierr = MatSetSizes(A,n1,n,N1,N1);CHKERRQ(ierr);
1508c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1509b3a17365SAmlan Barua #endif
1510b2b77a04SHong Zhang       break;
1511b2b77a04SHong Zhang     }
1512b2b77a04SHong Zhang   }
1513b2b77a04SHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1514b2b77a04SHong Zhang   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1515b2b77a04SHong Zhang   fft->data = (void*)fftw;
1516b2b77a04SHong Zhang 
1517b2b77a04SHong Zhang   fft->n           = n;
1518c92418ccSAmlan Barua   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1519e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
1520c92418ccSAmlan Barua   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1521b1301b2eSHong Zhang   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1522c92418ccSAmlan Barua 
1523b2b77a04SHong Zhang   fftw->p_forward  = 0;
1524b2b77a04SHong Zhang   fftw->p_backward = 0;
1525b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1526b2b77a04SHong Zhang 
1527b2b77a04SHong Zhang   if (size == 1){
1528b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1529b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1530b2b77a04SHong Zhang   } else {
1531b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1532b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1533b2b77a04SHong Zhang   }
1534b2b77a04SHong Zhang   fft->matdestroy          = MatDestroy_FFTW;
15356a506ed8SAmlan Barua   // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D
15364be45526SHong Zhang   //A->ops->getvecs       = MatGetVecs_FFTW;
1537b2b77a04SHong Zhang   A->assembled          = PETSC_TRUE;
15384be45526SHong Zhang   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);
15393c3a9c75SAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
15405b3e41ffSAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
1541b2b77a04SHong Zhang 
1542b2b77a04SHong Zhang   /* get runtime options */
1543b2b77a04SHong Zhang   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1544b2b77a04SHong Zhang     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1545b2b77a04SHong Zhang     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1546b2b77a04SHong Zhang   PetscOptionsEnd();
1547b2b77a04SHong Zhang   PetscFunctionReturn(0);
1548b2b77a04SHong Zhang }
1549b2b77a04SHong Zhang EXTERN_C_END
15503c3a9c75SAmlan Barua 
15513c3a9c75SAmlan Barua 
15523c3a9c75SAmlan Barua 
15533c3a9c75SAmlan Barua 
1554