xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision c8a8a4f0d4d51c20aa00b6c6fd97dc1f439daf51)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14e5d7f247SAmlan Barua   PetscInt   partial_dim;
15b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
16b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
19b2b77a04SHong Zhang } Mat_FFTW;
20b2b77a04SHong Zhang 
21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
274be45526SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); // to be removed!
28b2b77a04SHong Zhang 
29b2b77a04SHong Zhang #undef __FUNCT__
30b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
31b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
32b2b77a04SHong Zhang {
33b2b77a04SHong Zhang   PetscErrorCode ierr;
34b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
35b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
36b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
37b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
38b2b77a04SHong Zhang 
39b2b77a04SHong Zhang   PetscFunctionBegin;
40b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
41b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
42b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
43b2b77a04SHong Zhang     switch (ndim){
44b2b77a04SHong Zhang     case 1:
4558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
46b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
4758a912c5SAmlan Barua #else
4858a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
4958a912c5SAmlan Barua #endif
50b2b77a04SHong Zhang       break;
51b2b77a04SHong Zhang     case 2:
5258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
53b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
5458a912c5SAmlan Barua #else
5558a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
5658a912c5SAmlan Barua #endif
57b2b77a04SHong Zhang       break;
58b2b77a04SHong Zhang     case 3:
5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
60b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6158a912c5SAmlan Barua #else
6258a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
6358a912c5SAmlan Barua #endif
64b2b77a04SHong Zhang       break;
65b2b77a04SHong Zhang     default:
6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
67b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6858a912c5SAmlan Barua #else
6958a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
7058a912c5SAmlan Barua #endif
71b2b77a04SHong Zhang       break;
72b2b77a04SHong Zhang     }
73b2b77a04SHong Zhang     fftw->finarray  = x_array;
74b2b77a04SHong Zhang     fftw->foutarray = y_array;
75b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
76b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
77b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
78b2b77a04SHong Zhang   } else { /* use existing plan */
79b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
80b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
81b2b77a04SHong Zhang     } else {
82b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
83b2b77a04SHong Zhang     }
84b2b77a04SHong Zhang   }
85b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
86b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
87b2b77a04SHong Zhang   PetscFunctionReturn(0);
88b2b77a04SHong Zhang }
89b2b77a04SHong Zhang 
90b2b77a04SHong Zhang #undef __FUNCT__
91b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
92b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
93b2b77a04SHong Zhang {
94b2b77a04SHong Zhang   PetscErrorCode ierr;
95b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
96b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
97b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
98b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
99b2b77a04SHong Zhang 
100b2b77a04SHong Zhang   PetscFunctionBegin;
101b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
102b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
103b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
104b2b77a04SHong Zhang     switch (ndim){
105b2b77a04SHong Zhang     case 1:
10658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
107b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
10858a912c5SAmlan Barua #else
109e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
11058a912c5SAmlan Barua #endif
111b2b77a04SHong Zhang       break;
112b2b77a04SHong Zhang     case 2:
11358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
114b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
11558a912c5SAmlan Barua #else
116e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
11758a912c5SAmlan Barua #endif
118b2b77a04SHong Zhang       break;
119b2b77a04SHong Zhang     case 3:
12058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
121b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12258a912c5SAmlan Barua #else
123e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
12458a912c5SAmlan Barua #endif
125b2b77a04SHong Zhang       break;
126b2b77a04SHong Zhang     default:
12758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
128b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12958a912c5SAmlan Barua #else
130e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
13158a912c5SAmlan Barua #endif
132b2b77a04SHong Zhang       break;
133b2b77a04SHong Zhang     }
134b2b77a04SHong Zhang     fftw->binarray  = x_array;
135b2b77a04SHong Zhang     fftw->boutarray = y_array;
136b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
137b2b77a04SHong Zhang   } else { /* use existing plan */
138b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
139b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
140b2b77a04SHong Zhang     } else {
141b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
142b2b77a04SHong Zhang     }
143b2b77a04SHong Zhang   }
144b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
145b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
146b2b77a04SHong Zhang   PetscFunctionReturn(0);
147b2b77a04SHong Zhang }
148b2b77a04SHong Zhang 
149b2b77a04SHong Zhang #undef __FUNCT__
150b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
151b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
152b2b77a04SHong Zhang {
153b2b77a04SHong Zhang   PetscErrorCode ierr;
154b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
155b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
156b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
157c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
158b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
159b2b77a04SHong Zhang 
160b2b77a04SHong Zhang   PetscFunctionBegin;
161b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
162b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
163b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
164b2b77a04SHong Zhang     switch (ndim){
165b2b77a04SHong Zhang     case 1:
1663c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
167b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
168ae0a50aaSHong Zhang #else
169ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
1703c3a9c75SAmlan Barua #endif
171b2b77a04SHong Zhang       break;
172b2b77a04SHong Zhang     case 2:
1733c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
174b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1753c3a9c75SAmlan Barua #else
1763c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1773c3a9c75SAmlan Barua #endif
178b2b77a04SHong Zhang       break;
179b2b77a04SHong Zhang     case 3:
1803c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
181b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1823c3a9c75SAmlan Barua #else
18351d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1843c3a9c75SAmlan Barua #endif
185b2b77a04SHong Zhang       break;
186b2b77a04SHong Zhang     default:
1873c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
188c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1893c3a9c75SAmlan Barua #else
1903c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1913c3a9c75SAmlan Barua #endif
192b2b77a04SHong Zhang       break;
193b2b77a04SHong Zhang     }
194b2b77a04SHong Zhang     fftw->finarray  = x_array;
195b2b77a04SHong Zhang     fftw->foutarray = y_array;
196b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
197b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
198b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
199b2b77a04SHong Zhang   } else { /* use existing plan */
200b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
201b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
202b2b77a04SHong Zhang     } else {
203b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
204b2b77a04SHong Zhang     }
205b2b77a04SHong Zhang   }
206b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
207b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
208b2b77a04SHong Zhang   PetscFunctionReturn(0);
209b2b77a04SHong Zhang }
210b2b77a04SHong Zhang 
211b2b77a04SHong Zhang #undef __FUNCT__
212b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
213b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
214b2b77a04SHong Zhang {
215b2b77a04SHong Zhang   PetscErrorCode ierr;
216b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
217b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
218b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
219c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
220b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
221b2b77a04SHong Zhang 
222b2b77a04SHong Zhang   PetscFunctionBegin;
223b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
224b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
225b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
226b2b77a04SHong Zhang     switch (ndim){
227b2b77a04SHong Zhang     case 1:
2283c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
229b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
230ae0a50aaSHong Zhang #else
231ae0a50aaSHong Zhang       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
2323c3a9c75SAmlan Barua #endif
233b2b77a04SHong Zhang       break;
234b2b77a04SHong Zhang     case 2:
2353c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
236b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2373c3a9c75SAmlan Barua #else
2383c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2393c3a9c75SAmlan Barua #endif
240b2b77a04SHong Zhang       break;
241b2b77a04SHong Zhang     case 3:
2423c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
243b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2443c3a9c75SAmlan Barua #else
2453c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2463c3a9c75SAmlan Barua #endif
247b2b77a04SHong Zhang       break;
248b2b77a04SHong Zhang     default:
2493c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
250c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2513c3a9c75SAmlan Barua #else
2523c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2533c3a9c75SAmlan Barua #endif
254b2b77a04SHong Zhang       break;
255b2b77a04SHong Zhang     }
256b2b77a04SHong Zhang     fftw->binarray  = x_array;
257b2b77a04SHong Zhang     fftw->boutarray = y_array;
258b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
259b2b77a04SHong Zhang   } else { /* use existing plan */
260b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
261b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
262b2b77a04SHong Zhang     } else {
263b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
264b2b77a04SHong Zhang     }
265b2b77a04SHong Zhang   }
266b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
267b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
268b2b77a04SHong Zhang   PetscFunctionReturn(0);
269b2b77a04SHong Zhang }
270b2b77a04SHong Zhang 
271b2b77a04SHong Zhang #undef __FUNCT__
272b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
273b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
274b2b77a04SHong Zhang {
275b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
276b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
277b2b77a04SHong Zhang   PetscErrorCode ierr;
278b2b77a04SHong Zhang 
279b2b77a04SHong Zhang   PetscFunctionBegin;
280b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
281b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
282b1301b2eSHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
283bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
284b2b77a04SHong Zhang   PetscFunctionReturn(0);
285b2b77a04SHong Zhang }
286b2b77a04SHong Zhang 
287c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
288b2b77a04SHong Zhang #undef __FUNCT__
289b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
290b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
291b2b77a04SHong Zhang {
292b2b77a04SHong Zhang   PetscErrorCode  ierr;
293b2b77a04SHong Zhang   PetscScalar     *array;
294b2b77a04SHong Zhang 
295b2b77a04SHong Zhang   PetscFunctionBegin;
296b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
297b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
298b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
299b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
300b2b77a04SHong Zhang   PetscFunctionReturn(0);
301b2b77a04SHong Zhang }
302b2b77a04SHong Zhang 
303b2b77a04SHong Zhang #undef __FUNCT__
3044be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_1DC"
305c92418ccSAmlan Barua /*
3064be45526SHong Zhang     - Get Vectors(s) compatible with matrix, i.e. with the
307c92418ccSAmlan Barua      parallel layout determined by FFTW-1D
308c92418ccSAmlan Barua 
309c92418ccSAmlan Barua */
3104be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_1DC(Mat A,Vec *fin,Vec *fout,Vec *bout)
311c92418ccSAmlan Barua {
312c92418ccSAmlan Barua   PetscErrorCode ierr;
313c92418ccSAmlan Barua   PetscMPIInt    size,rank;
314c92418ccSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
315c92418ccSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
316c92418ccSAmlan Barua //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
317c92418ccSAmlan Barua   PetscInt       N=fft->N;
318c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
319c92418ccSAmlan Barua   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
320c92418ccSAmlan Barua   ptrdiff_t      f_local_n1,f_local_1_end;
321c92418ccSAmlan Barua   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
322c92418ccSAmlan Barua   ptrdiff_t      b_local_n1,b_local_1_end;
323ae0a50aaSHong Zhang   fftw_complex   *data_fin,*data_fout,*data_bout;
324c92418ccSAmlan Barua 
325c92418ccSAmlan Barua   PetscFunctionBegin;
326c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
327c92418ccSAmlan Barua   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
328c92418ccSAmlan Barua #endif
329c92418ccSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
330c92418ccSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
331c92418ccSAmlan Barua   if (size == 1){
332c92418ccSAmlan Barua     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
333c92418ccSAmlan Barua   }
334c92418ccSAmlan Barua   else {
335c92418ccSAmlan Barua       if (ndim>1){
336c92418ccSAmlan Barua         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
337c92418ccSAmlan Barua       else {
338c92418ccSAmlan Barua           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
339c92418ccSAmlan Barua           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
340c92418ccSAmlan Barua           if (fin) {
341c92418ccSAmlan Barua             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
342c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
343c92418ccSAmlan Barua             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
344c92418ccSAmlan Barua           }
345c92418ccSAmlan Barua           if (fout) {
346c92418ccSAmlan Barua             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
347c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
348c92418ccSAmlan Barua             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
349c92418ccSAmlan Barua           }
350c92418ccSAmlan Barua           if (bout) {
351c92418ccSAmlan Barua             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
352c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
353c92418ccSAmlan Barua             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
354c92418ccSAmlan Barua           }
355c92418ccSAmlan Barua   }
356c92418ccSAmlan Barua   if (fin){
357c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
358c92418ccSAmlan Barua   }
359c92418ccSAmlan Barua   if (fout){
360c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
361c92418ccSAmlan Barua   }
362c92418ccSAmlan Barua   if (bout){
363c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
364c92418ccSAmlan Barua   }
365c92418ccSAmlan Barua   PetscFunctionReturn(0);
366c92418ccSAmlan Barua }
367c92418ccSAmlan Barua 
368c92418ccSAmlan Barua 
369c92418ccSAmlan Barua }
3703c3a9c75SAmlan Barua 
3714f7415efSAmlan Barua #undef __FUNCT__
3724be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW"
3734be45526SHong Zhang /*@
374b2aa233eSHong Zhang    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
3754f7415efSAmlan Barua      parallel layout determined by FFTW
3764f7415efSAmlan Barua 
3774f7415efSAmlan Barua    Collective on Mat
3784f7415efSAmlan Barua 
3794f7415efSAmlan Barua    Input Parameter:
3804f7415efSAmlan Barua .  mat - the matrix
3814f7415efSAmlan Barua 
3824f7415efSAmlan Barua    Output Parameter:
3834f7415efSAmlan Barua +   fin - (optional) input vector of forward FFTW
3844f7415efSAmlan Barua -   fout - (optional) output vector of forward FFTW
3854f7415efSAmlan Barua 
3864f7415efSAmlan Barua   Level: advanced
3874f7415efSAmlan Barua 
3884f7415efSAmlan Barua .seealso: MatCreateFFTW()
3894be45526SHong Zhang @*/
3904be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
3914be45526SHong Zhang {
3924be45526SHong Zhang   PetscErrorCode ierr;
3934be45526SHong Zhang   PetscFunctionBegin;
3944be45526SHong Zhang   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
3954be45526SHong Zhang   PetscFunctionReturn(0);
3964be45526SHong Zhang }
3974be45526SHong Zhang 
3984f7415efSAmlan Barua EXTERN_C_BEGIN
3994be45526SHong Zhang #undef __FUNCT__
4004be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW"
4014be45526SHong Zhang PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
4024f7415efSAmlan Barua {
4034f7415efSAmlan Barua   PetscErrorCode ierr;
4044f7415efSAmlan Barua   PetscMPIInt    size,rank;
4054f7415efSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
4064f7415efSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
4074f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
4089496c9aeSAmlan Barua   PetscInt       N=fft->N;
4094f7415efSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
4104f7415efSAmlan Barua 
4114f7415efSAmlan Barua   PetscFunctionBegin;
4124f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
4134f7415efSAmlan Barua   PetscValidType(A,1);
4144f7415efSAmlan Barua 
4154f7415efSAmlan Barua   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
4164f7415efSAmlan Barua   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
4174f7415efSAmlan Barua   if (size == 1){ /* sequential case */
4184f7415efSAmlan Barua   printf("Routine is getting called\n");
4194f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4204f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
4214f7415efSAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
4224f7415efSAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
4234f7415efSAmlan Barua #else
4249496c9aeSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
4254f7415efSAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
4264f7415efSAmlan Barua     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);}
4274f7415efSAmlan Barua #endif
4284f7415efSAmlan Barua   } else {
4294f7415efSAmlan Barua     ptrdiff_t      alloc_local,local_n0,local_0_start;
4309496c9aeSAmlan Barua     ptrdiff_t      local_n1;
4319496c9aeSAmlan Barua     fftw_complex   *data_fout;
4329496c9aeSAmlan Barua     ptrdiff_t      local_1_start;
4339496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4349496c9aeSAmlan Barua     fftw_complex   *data_fin,*data_bout;
4359496c9aeSAmlan Barua #else
4364f7415efSAmlan Barua     double         *data_finr,*data_boutr;
4379496c9aeSAmlan Barua     PetscInt       n1,N1,vsize;
4389496c9aeSAmlan Barua     ptrdiff_t      temp;
4399496c9aeSAmlan Barua #endif
4409496c9aeSAmlan Barua 
4414f7415efSAmlan Barua     switch (ndim){
4424f7415efSAmlan Barua           case 1:
44357625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
44457625b84SAmlan Barua                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
44557625b84SAmlan Barua #else
4469496c9aeSAmlan Barua                  //SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet");
44757625b84SAmlan Barua                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
44857625b84SAmlan Barua                  if (fin) {
44957625b84SAmlan Barua                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
45057625b84SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
45157625b84SAmlan Barua                          (*fin)->ops->destroy = VecDestroy_MPIFFTW;
45257625b84SAmlan Barua                          }
45357625b84SAmlan Barua                  if (fout) {
45457625b84SAmlan Barua                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
45557625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
45657625b84SAmlan Barua                           (*fout)->ops->destroy = VecDestroy_MPIFFTW;
45757625b84SAmlan Barua                          }
45857625b84SAmlan Barua                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
45957625b84SAmlan Barua                  if (bout) {
46057625b84SAmlan Barua                           data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
46157625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
46257625b84SAmlan Barua                           (*bout)->ops->destroy = VecDestroy_MPIFFTW;
46357625b84SAmlan Barua                          }
46457625b84SAmlan Barua           break;
46557625b84SAmlan Barua #endif
4664f7415efSAmlan Barua           case 2:
4674f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4684f7415efSAmlan Barua                  alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4694f7415efSAmlan Barua                  N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
4704f7415efSAmlan Barua                  if (fin) {
4714f7415efSAmlan Barua                            data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4724f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
4734f7415efSAmlan Barua                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4744f7415efSAmlan Barua                           }
4754f7415efSAmlan Barua                  if (bout) {
4764f7415efSAmlan Barua                            data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
4774f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
4784f7415efSAmlan Barua                            ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr);
4794f7415efSAmlan Barua                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
4804f7415efSAmlan Barua                           }
481*c8a8a4f0SAmlan Barua                  //n1 = 2*local_n1*dim[0];
48257625b84SAmlan Barua                  if (fout) {
48357625b84SAmlan Barua                             data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4849496c9aeSAmlan Barua                             ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
48557625b84SAmlan Barua                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
48657625b84SAmlan Barua                            }
4874f7415efSAmlan Barua #else
4884f7415efSAmlan Barua       /* Get local size */
4894f7415efSAmlan Barua                  alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
4904f7415efSAmlan Barua                  if (fin) {
4914f7415efSAmlan Barua                            data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4924f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
4934f7415efSAmlan Barua                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4944f7415efSAmlan Barua                           }
4954f7415efSAmlan Barua                  if (fout) {
4964f7415efSAmlan Barua                             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4974f7415efSAmlan Barua                             ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
4984f7415efSAmlan Barua                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
4994f7415efSAmlan Barua                            }
5004f7415efSAmlan Barua                  if (bout) {
5014f7415efSAmlan Barua                            data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5024f7415efSAmlan Barua                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5034f7415efSAmlan Barua                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5044f7415efSAmlan Barua                           }
5054f7415efSAmlan Barua #endif
5064f7415efSAmlan Barua           break;
5074f7415efSAmlan Barua           case 3:
5084f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5094f7415efSAmlan Barua                  alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5104f7415efSAmlan Barua                  N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
5114f7415efSAmlan Barua                  if (fin) {
5124f7415efSAmlan Barua                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5134f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
5144f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5154f7415efSAmlan Barua                          }
5164f7415efSAmlan Barua                  if (bout) {
5174f7415efSAmlan Barua                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5184f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
5194f7415efSAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5204f7415efSAmlan Barua                           }
521*c8a8a4f0SAmlan Barua                  //n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
52257625b84SAmlan Barua                  if (fout) {
52357625b84SAmlan Barua                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
52457625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
52557625b84SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
52657625b84SAmlan Barua                           }
5274f7415efSAmlan Barua #else
5280c9b18e4SAmlan Barua                  alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
5290c9b18e4SAmlan Barua                  if (fin) {
5300c9b18e4SAmlan Barua                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5310c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5320c9b18e4SAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5330c9b18e4SAmlan Barua                          }
5340c9b18e4SAmlan Barua                  if (fout) {
5350c9b18e4SAmlan Barua                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5360c9b18e4SAmlan Barua                           ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5370c9b18e4SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5380c9b18e4SAmlan Barua                          }
5390c9b18e4SAmlan Barua                  if (bout) {
5400c9b18e4SAmlan Barua                          data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5410c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5420c9b18e4SAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5430c9b18e4SAmlan Barua                          }
5444f7415efSAmlan Barua #endif
5454f7415efSAmlan Barua           break;
5464f7415efSAmlan Barua           default:
5474f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5484f7415efSAmlan Barua                  temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
5494f7415efSAmlan Barua                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
5504f7415efSAmlan Barua                  alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5514f7415efSAmlan Barua                  N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
5524f7415efSAmlan Barua                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
5534f7415efSAmlan Barua 
5544f7415efSAmlan Barua                  if (fin) {
5554f7415efSAmlan Barua                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5564f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
5574f7415efSAmlan Barua                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5584f7415efSAmlan Barua                         }
5594f7415efSAmlan Barua                  if (bout) {
5604f7415efSAmlan Barua                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
5614f7415efSAmlan Barua                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
5629496c9aeSAmlan Barua                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5634f7415efSAmlan Barua                         }
564*c8a8a4f0SAmlan Barua                  //temp = fftw->partial_dim;
565*c8a8a4f0SAmlan Barua                  //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
566*c8a8a4f0SAmlan Barua                  //n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
56757625b84SAmlan Barua                  if (fout) {
56857625b84SAmlan Barua                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
56957625b84SAmlan Barua                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
57057625b84SAmlan Barua                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
57157625b84SAmlan Barua                         }
5724f7415efSAmlan Barua #else
5730c9b18e4SAmlan Barua                 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
5740c9b18e4SAmlan Barua                 if (fin) {
5750c9b18e4SAmlan Barua                        data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5760c9b18e4SAmlan Barua                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5770c9b18e4SAmlan Barua                        (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5780c9b18e4SAmlan Barua                        }
5790c9b18e4SAmlan Barua                 if (fout) {
5800c9b18e4SAmlan Barua                          data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5810c9b18e4SAmlan Barua                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5820c9b18e4SAmlan Barua                          (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5830c9b18e4SAmlan Barua                        }
5840c9b18e4SAmlan Barua                 if (bout) {
5850c9b18e4SAmlan Barua                        data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5860c9b18e4SAmlan Barua                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5870c9b18e4SAmlan Barua                        (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5880c9b18e4SAmlan Barua                        }
5894f7415efSAmlan Barua #endif
5904f7415efSAmlan Barua             break;
5914f7415efSAmlan Barua           }
5924f7415efSAmlan Barua   }
5934f7415efSAmlan Barua   PetscFunctionReturn(0);
5944f7415efSAmlan Barua }
5954f7415efSAmlan Barua EXTERN_C_END
5964f7415efSAmlan Barua 
597c92418ccSAmlan Barua #undef __FUNCT__
598b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW"
599b2b77a04SHong Zhang /*
600b2b77a04SHong Zhang    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
601b2b77a04SHong Zhang      parallel layout determined by FFTW
602b2b77a04SHong Zhang 
603b2b77a04SHong Zhang    Collective on Mat
604b2b77a04SHong Zhang 
605b2b77a04SHong Zhang    Input Parameter:
606b2b77a04SHong Zhang .  mat - the matrix
607b2b77a04SHong Zhang 
608b2b77a04SHong Zhang    Output Parameter:
609b2b77a04SHong Zhang +   fin - (optional) input vector of forward FFTW
610b2b77a04SHong Zhang -   fout - (optional) output vector of forward FFTW
611b2b77a04SHong Zhang 
612b2b77a04SHong Zhang   Level: advanced
613b2b77a04SHong Zhang 
614b2b77a04SHong Zhang .seealso: MatCreateFFTW()
615b2b77a04SHong Zhang */
616b2b77a04SHong Zhang PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
617b2b77a04SHong Zhang {
618b2b77a04SHong Zhang   PetscErrorCode ierr;
619b2b77a04SHong Zhang   PetscMPIInt    size,rank;
620b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
621b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
622c92418ccSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
6239496c9aeSAmlan Barua   PetscInt       N=fft->N;
624e81bb599SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
625b2b77a04SHong Zhang 
626b2b77a04SHong Zhang   PetscFunctionBegin;
627b2b77a04SHong Zhang   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
628b2b77a04SHong Zhang   PetscValidType(A,1);
629b2b77a04SHong Zhang 
630b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
631b2b77a04SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
632b2b77a04SHong Zhang   if (size == 1){ /* sequential case */
633e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX)
634b2b77a04SHong Zhang     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
635b2b77a04SHong Zhang     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
636e5d7f247SAmlan Barua #else
637e81bb599SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
638e81bb599SAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
639e81bb599SAmlan Barua #endif
640b2b77a04SHong Zhang   } else {        /* mpi case */
641b2b77a04SHong Zhang     ptrdiff_t      alloc_local,local_n0,local_0_start;
6426882372aSHong Zhang     ptrdiff_t      local_n1,local_1_end;
643b2b77a04SHong Zhang     fftw_complex   *data_fin,*data_fout;
6449496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
64551d1eed7SAmlan Barua     double         *data_finr ;
646b3a17365SAmlan Barua     ptrdiff_t      local_1_start,temp;
6479496c9aeSAmlan Barua     PetscInt       vsize,n1,N1;
6489496c9aeSAmlan Barua #endif
6499496c9aeSAmlan Barua 
650c92418ccSAmlan Barua //    PetscInt ctr;
651c92418ccSAmlan Barua //    ptrdiff_t      ndim1,*pdim;
652c92418ccSAmlan Barua //    ndim1=(ptrdiff_t) ndim;
653c92418ccSAmlan Barua //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
65411902ff2SHong Zhang 
655c92418ccSAmlan Barua //    for(ctr=0;ctr<ndim;ctr++)
656f76f76beSAmlan Barua //        {k
657c92418ccSAmlan Barua //           pdim[ctr] = dim[ctr];
658c92418ccSAmlan Barua //       }
659b2b77a04SHong Zhang 
660f76f76beSAmlan Barua 
661f76f76beSAmlan Barua 
662b2b77a04SHong Zhang     switch (ndim){
663b2b77a04SHong Zhang     case 1:
6646882372aSHong Zhang       /* Get local size */
665e5d7f247SAmlan Barua       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
666c92418ccSAmlan Barua //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
6676882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
6689496c9aeSAmlan Barua       printf("The values of local_n0 and local_n1 are %ld and %ld\n",local_n0,local_n1);
6696882372aSHong Zhang       if (fin) {
6706882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6716882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
6726882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6736882372aSHong Zhang       }
6746882372aSHong Zhang       if (fout) {
6756882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
67657625b84SAmlan Barua         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6776882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6786882372aSHong Zhang       }
679b2b77a04SHong Zhang       break;
680b2b77a04SHong Zhang     case 2:
6813c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6823c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
6833c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
6843c3a9c75SAmlan Barua       if (fin) {
6853c3a9c75SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
68654dd5118SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
6873c3a9c75SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
68809dd8a53SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
6893c3a9c75SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6903c3a9c75SAmlan Barua       }
69157625b84SAmlan Barua       n1 = 2*local_n1*(dim[0]);
69257625b84SAmlan Barua       //n1 = 2*local_n1*dim[1];
69357625b84SAmlan Barua       printf("The values are %ld\n",local_n1);
6943c3a9c75SAmlan Barua       if (fout) {
69509dd8a53SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
69609dd8a53SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6973c3a9c75SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6983c3a9c75SAmlan Barua       }
699f76f76beSAmlan Barua       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
7003c3a9c75SAmlan Barua 
7013c3a9c75SAmlan Barua #else
702b2b77a04SHong Zhang       /* Get local size */
70364657d84SAmlan Barua      //printf("Hope this does not come here");
704b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
705b2b77a04SHong Zhang       if (fin) {
706b2b77a04SHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
707b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
708b2b77a04SHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
709b2b77a04SHong Zhang       }
710b2b77a04SHong Zhang       if (fout) {
711b2b77a04SHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
712b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
713b2b77a04SHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
714b2b77a04SHong Zhang       }
71564657d84SAmlan Barua      //printf("Hope this does not come here");
7163c3a9c75SAmlan Barua #endif
717b2b77a04SHong Zhang       break;
718b2b77a04SHong Zhang     case 3:
7196882372aSHong Zhang       /* Get local size */
7203c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
72151d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
72251d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
72351d1eed7SAmlan Barua       if (fin) {
72451d1eed7SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
72551d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
72651d1eed7SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
72751d1eed7SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
72851d1eed7SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
72951d1eed7SAmlan Barua       }
73057625b84SAmlan Barua       printf("The value is %ld",local_n1);
73157625b84SAmlan Barua       n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
73251d1eed7SAmlan Barua       if (fout) {
73351d1eed7SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
73451d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
73551d1eed7SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
73651d1eed7SAmlan Barua       }
73751d1eed7SAmlan Barua       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
73851d1eed7SAmlan Barua 
73951d1eed7SAmlan Barua 
7403c3a9c75SAmlan Barua #else
7416882372aSHong Zhang       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
74211902ff2SHong Zhang //      printf("The quantity n is %d",n);
7436882372aSHong Zhang       if (fin) {
7446882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7456882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
7466882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
7476882372aSHong Zhang       }
7486882372aSHong Zhang       if (fout) {
7496882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7506882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
7516882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
7526882372aSHong Zhang       }
7533c3a9c75SAmlan Barua #endif
754b2b77a04SHong Zhang       break;
755b2b77a04SHong Zhang     default:
7566882372aSHong Zhang       /* Get local size */
7573c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
758b3a17365SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
759b3a17365SAmlan Barua       printf("The value of temp is %ld\n",temp);
760b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
761b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
762b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
763b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
764b3a17365SAmlan Barua       if (fin) {
765b3a17365SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
766b3a17365SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
767b3a17365SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
768b3a17365SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
769b3a17365SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
770b3a17365SAmlan Barua       }
77157625b84SAmlan Barua       printf("The value is %ld. Global length is %d \n",local_n1,N1);
77257625b84SAmlan Barua       temp = fftw->partial_dim;
77357625b84SAmlan Barua       fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
77457625b84SAmlan Barua 
77557625b84SAmlan Barua       n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
776b3a17365SAmlan Barua       if (fout) {
777b3a17365SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
77857625b84SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
779b3a17365SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
780b3a17365SAmlan Barua       }
781b3a17365SAmlan Barua 
7823c3a9c75SAmlan Barua #else
783c92418ccSAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
78411902ff2SHong Zhang //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
78511902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
78611902ff2SHong Zhang //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
78711902ff2SHong Zhang //      for(i=0;i<ndim;i++)
78811902ff2SHong Zhang //         {
78911902ff2SHong Zhang //          pdim[i]=dim[i];printf("%d",pdim[i]);
79011902ff2SHong Zhang //         }
79111902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
79211902ff2SHong Zhang //      printf("The quantity n is %d",n);
7936882372aSHong Zhang       if (fin) {
7946882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
7956882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
7966882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
7976882372aSHong Zhang       }
7986882372aSHong Zhang       if (fout) {
7996882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
8006882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
8016882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
8026882372aSHong Zhang       }
8033c3a9c75SAmlan Barua #endif
804b2b77a04SHong Zhang       break;
805b2b77a04SHong Zhang     }
806b2b77a04SHong Zhang   }
80754dd5118SAmlan Barua //  if (fin){
80854dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
80954dd5118SAmlan Barua //  }
81054dd5118SAmlan Barua //  if (fout){
81154dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
81254dd5118SAmlan Barua //  }
813b2b77a04SHong Zhang   PetscFunctionReturn(0);
814b2b77a04SHong Zhang }
815b2b77a04SHong Zhang 
816b2b77a04SHong Zhang #undef __FUNCT__
8173c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT"
8183c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
8193c3a9c75SAmlan Barua {
8203c3a9c75SAmlan Barua   PetscErrorCode ierr;
8213c3a9c75SAmlan Barua   PetscFunctionBegin;
8223c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
8233c3a9c75SAmlan Barua   PetscFunctionReturn(0);
8243c3a9c75SAmlan Barua }
82554efbe56SHong Zhang 
826b2b77a04SHong Zhang /*
8279496c9aeSAmlan Barua       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block. For real
8289496c9aeSAmlan Barua       parallel FFT, this routine also performs padding of right number of zeros at the end of the fastetst
8299496c9aeSAmlan Barua       changing dimension.
8303c3a9c75SAmlan Barua   Input A, x, y
8313c3a9c75SAmlan Barua   A - FFTW matrix
83254dd5118SAmlan Barua   x - user data
833b2b77a04SHong Zhang   Options Database Keys:
834b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags
835b2b77a04SHong Zhang 
836b2b77a04SHong Zhang    Level: intermediate
837b2b77a04SHong Zhang 
838b2b77a04SHong Zhang */
8393c3a9c75SAmlan Barua 
8403c3a9c75SAmlan Barua EXTERN_C_BEGIN
8413c3a9c75SAmlan Barua #undef __FUNCT__
8423c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW"
8433c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
8443c3a9c75SAmlan Barua {
8453c3a9c75SAmlan Barua   PetscErrorCode ierr;
8463c3a9c75SAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
8473c3a9c75SAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
8483c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
8499496c9aeSAmlan Barua   PetscInt       N=fft->N;
850b223cc97SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
8519496c9aeSAmlan Barua   PetscInt       low;
8529496c9aeSAmlan Barua   PetscInt       rank,size;
8533c3a9c75SAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
8549496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
8553c3a9c75SAmlan Barua   VecScatter     vecscat;
8563c3a9c75SAmlan Barua   IS             list1,list2;
8579496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
8589496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
8599496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
8609496c9aeSAmlan Barua   PetscInt       N1, n1 ,NM;
8619496c9aeSAmlan Barua   ptrdiff_t      temp;
8629496c9aeSAmlan Barua #endif
8633c3a9c75SAmlan Barua 
864f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
865f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
8663c3a9c75SAmlan Barua   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
8673c3a9c75SAmlan Barua 
868e81bb599SAmlan Barua   if (size==1)
869e81bb599SAmlan Barua     {
8707536937bSAmlan Barua /*     switch (ndim){
871e81bb599SAmlan Barua      case 1:
872e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
873e81bb599SAmlan Barua           for (i=0;i<dim[0];i++)
874e81bb599SAmlan Barua              {
875e81bb599SAmlan Barua               indx1[i] = i;
876e81bb599SAmlan Barua              }
877e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
878e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
879e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
880e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
882b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
883b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
884e81bb599SAmlan Barua      break;
885e81bb599SAmlan Barua 
886e81bb599SAmlan Barua      case 2:
887e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
888e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
889e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
890e81bb599SAmlan Barua                 indx1[i*dim[1]+j] = i*dim[1] + j;
891e81bb599SAmlan Barua              }
892e81bb599SAmlan Barua           }
893e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
894e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
895e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
896e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
897e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
898b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
899b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
900e81bb599SAmlan Barua           break;
901e81bb599SAmlan Barua      case 3:
902e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
903e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
904e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
905e81bb599SAmlan Barua                 for (k=0;k<dim[2];k++){
906e81bb599SAmlan Barua                    indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
907e81bb599SAmlan Barua                 }
908e81bb599SAmlan Barua              }
909e81bb599SAmlan Barua           }
910e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
911e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
912e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
913e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
914e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
915b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
916b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
917e81bb599SAmlan Barua           break;
918e81bb599SAmlan Barua      default:
9197536937bSAmlan Barua */
9209496c9aeSAmlan Barua           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
9216971673cSAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
9226971673cSAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9236971673cSAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9246971673cSAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
925b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
9267536937bSAmlan Barua  //         break;
9277536937bSAmlan Barua   //    }
928e81bb599SAmlan Barua     }
929e81bb599SAmlan Barua 
930e81bb599SAmlan Barua  else{
931e81bb599SAmlan Barua 
9323c3a9c75SAmlan Barua  switch (ndim){
9333c3a9c75SAmlan Barua  case 1:
93464657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
93564657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
93664657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
93764657d84SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
93864657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
93964657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
94064657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
94164657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
94264657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
94364657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
94464657d84SAmlan Barua #else
945e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
94664657d84SAmlan Barua #endif
9473c3a9c75SAmlan Barua  break;
9483c3a9c75SAmlan Barua  case 2:
949bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
950bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
951bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
952bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
953bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
954bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
955bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
956bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
957bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
958bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
959bd59e6c6SAmlan Barua #else
9603c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
9613c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
9629496c9aeSAmlan Barua       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
9639496c9aeSAmlan Barua       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
9649496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
9659496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
9663c3a9c75SAmlan Barua 
9673c3a9c75SAmlan Barua       if (dim[1]%2==0)
9683c3a9c75SAmlan Barua         NM = dim[1]+2;
9693c3a9c75SAmlan Barua       else
9703c3a9c75SAmlan Barua         NM = dim[1]+1;
9713c3a9c75SAmlan Barua 
9723c3a9c75SAmlan Barua       for (i=0;i<local_n0;i++){
9733c3a9c75SAmlan Barua          for (j=0;j<dim[1];j++){
9743c3a9c75SAmlan Barua             tempindx = i*dim[1] + j;
9753c3a9c75SAmlan Barua             tempindx1 = i*NM + j;
9765b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
9773c3a9c75SAmlan Barua             indx2[tempindx]=low+tempindx1;
9783c3a9c75SAmlan Barua         }
9793c3a9c75SAmlan Barua      }
9803c3a9c75SAmlan Barua 
9813c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
9823c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
9833c3a9c75SAmlan Barua 
984f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
985f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
986f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
987f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
988b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
989b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
990b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
991b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
992bd59e6c6SAmlan Barua #endif
9939496c9aeSAmlan Barua  break;
9943c3a9c75SAmlan Barua 
9953c3a9c75SAmlan Barua  case 3:
996bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
997bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
998bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
999bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1000bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1001bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1002bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1003bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1006bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1007bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1008bd59e6c6SAmlan Barua #else
100951d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
101051d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
101151d1eed7SAmlan Barua 
10129496c9aeSAmlan Barua //      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
10139496c9aeSAmlan Barua //      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
10149496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
10159496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
101651d1eed7SAmlan Barua 
101751d1eed7SAmlan Barua       if (dim[2]%2==0)
101851d1eed7SAmlan Barua         NM = dim[2]+2;
101951d1eed7SAmlan Barua       else
102051d1eed7SAmlan Barua         NM = dim[2]+1;
102151d1eed7SAmlan Barua 
102251d1eed7SAmlan Barua       for (i=0;i<local_n0;i++){
102351d1eed7SAmlan Barua          for (j=0;j<dim[1];j++){
102451d1eed7SAmlan Barua             for (k=0;k<dim[2];k++){
102551d1eed7SAmlan Barua                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
102651d1eed7SAmlan Barua                tempindx1 = i*dim[1]*NM + j*NM + k;
102751d1eed7SAmlan Barua                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
102851d1eed7SAmlan Barua                indx2[tempindx]=low+tempindx1;
102951d1eed7SAmlan Barua             }
103051d1eed7SAmlan Barua          }
103151d1eed7SAmlan Barua       }
103251d1eed7SAmlan Barua 
103351d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
103451d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
103551d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
103651d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
103751d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
103851d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1039b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1040b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1041b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1042b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1043bd59e6c6SAmlan Barua #endif
10449496c9aeSAmlan Barua  break;
10453c3a9c75SAmlan Barua 
10463c3a9c75SAmlan Barua  default:
1047bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1048bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1049bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1050bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1051bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1052bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1053bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1054bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1055bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1056bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1057bd59e6c6SAmlan Barua #else
1058e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1059e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1060e5d7f247SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1061e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1062e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1063e5d7f247SAmlan Barua 
1064e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
1065e5d7f247SAmlan Barua 
1066e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1067e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1068e5d7f247SAmlan Barua 
1069e5d7f247SAmlan Barua       if (dim[ndim-1]%2==0)
1070ba6e06d0SAmlan Barua         NM = 2;
1071e5d7f247SAmlan Barua       else
1072ba6e06d0SAmlan Barua         NM = 1;
1073e5d7f247SAmlan Barua 
10746971673cSAmlan Barua       j = low;
10756971673cSAmlan Barua       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
10766971673cSAmlan Barua          {
10776971673cSAmlan Barua           indx1[i] = local_0_start*partial_dim + i;
10786971673cSAmlan Barua           indx2[i] = j;
10796971673cSAmlan Barua           if (k%dim[ndim-1]==0)
10806971673cSAmlan Barua             { j+=NM;}
10816971673cSAmlan Barua           j++;
10826971673cSAmlan Barua          }
10836971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
10846971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
10856971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
10866971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10876971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10886971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1089b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1090b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1091b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1092b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1093bd59e6c6SAmlan Barua #endif
10949496c9aeSAmlan Barua  break;
10953c3a9c75SAmlan Barua   }
1096e81bb599SAmlan Barua  }
10973c3a9c75SAmlan Barua 
10983c3a9c75SAmlan Barua  return 0;
10993c3a9c75SAmlan Barua }
11003c3a9c75SAmlan Barua EXTERN_C_END
11013c3a9c75SAmlan Barua 
11023c3a9c75SAmlan Barua #undef __FUNCT__
11033c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT"
11043c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
11053c3a9c75SAmlan Barua {
11063c3a9c75SAmlan Barua   PetscErrorCode ierr;
11073c3a9c75SAmlan Barua   PetscFunctionBegin;
11083c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
11093c3a9c75SAmlan Barua   PetscFunctionReturn(0);
11103c3a9c75SAmlan Barua }
111154efbe56SHong Zhang 
11125b3e41ffSAmlan Barua /*
11135b3e41ffSAmlan Barua       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
11145b3e41ffSAmlan Barua   Input A, x, y
11155b3e41ffSAmlan Barua   A - FFTW matrix
11165b3e41ffSAmlan Barua   x - FFTW vector
11175b3e41ffSAmlan Barua   y - PETSc vector that user can read
11185b3e41ffSAmlan Barua   Options Database Keys:
11195b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
11205b3e41ffSAmlan Barua 
11215b3e41ffSAmlan Barua    Level: intermediate
11225b3e41ffSAmlan Barua 
11233c3a9c75SAmlan Barua */
11243c3a9c75SAmlan Barua 
11253c3a9c75SAmlan Barua EXTERN_C_BEGIN
11263c3a9c75SAmlan Barua #undef __FUNCT__
11275b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW"
11285b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
11295b3e41ffSAmlan Barua {
11305b3e41ffSAmlan Barua   PetscErrorCode ierr;
11315b3e41ffSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
11325b3e41ffSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
11335b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
11349496c9aeSAmlan Barua   PetscInt       N=fft->N;
1135b3655f67SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
11369496c9aeSAmlan Barua   PetscInt       low;
11379496c9aeSAmlan Barua   PetscInt       rank,size;
11385b3e41ffSAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
11399496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
11405b3e41ffSAmlan Barua   VecScatter     vecscat;
11415b3e41ffSAmlan Barua   IS             list1,list2;
11429496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11439496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
11449496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
11459496c9aeSAmlan Barua   PetscInt       N1, n1 ,NM;
11469496c9aeSAmlan Barua   ptrdiff_t      temp;
11479496c9aeSAmlan Barua #endif
11489496c9aeSAmlan Barua 
11495b3e41ffSAmlan Barua 
11505b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
11515b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1152b3655f67SAmlan Barua   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr);
11535b3e41ffSAmlan Barua 
1154e81bb599SAmlan Barua   if (size==1){
11557536937bSAmlan Barua /*
11565b3e41ffSAmlan Barua     switch (ndim){
11575b3e41ffSAmlan Barua     case 1:
1158e81bb599SAmlan Barua            ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
1159e81bb599SAmlan Barua           for (i=0;i<dim[0];i++)
1160e81bb599SAmlan Barua              {
1161e81bb599SAmlan Barua               indx1[i] = i;
1162e81bb599SAmlan Barua              }
1163e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1164e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1165e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1166e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1167e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1168b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
1169b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
1170e81bb599SAmlan Barua           break;
1171e81bb599SAmlan Barua 
1172e81bb599SAmlan Barua     case 2:
1173e81bb599SAmlan Barua          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
1174e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
1175e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
1176e81bb599SAmlan Barua                 indx1[i*dim[1]+j] = i*dim[1] + j;
1177e81bb599SAmlan Barua              }
1178e81bb599SAmlan Barua           }
1179e81bb599SAmlan Barua          ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1180e81bb599SAmlan Barua          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1181e81bb599SAmlan Barua          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1182e81bb599SAmlan Barua          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1183e81bb599SAmlan Barua          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1184b223cc97SAmlan Barua          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1185b223cc97SAmlan Barua          ierr = PetscFree(indx1);CHKERRQ(ierr);
1186e81bb599SAmlan Barua          break;
1187e81bb599SAmlan Barua     case 3:
1188e81bb599SAmlan Barua          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1189e81bb599SAmlan Barua          for (i=0;i<dim[0];i++){
1190e81bb599SAmlan Barua             for (j=0;j<dim[1];j++){
1191e81bb599SAmlan Barua                for (k=0;k<dim[2];k++){
1192e81bb599SAmlan Barua                   indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
1193e81bb599SAmlan Barua                }
1194e81bb599SAmlan Barua             }
1195e81bb599SAmlan Barua          }
1196e81bb599SAmlan Barua          ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1197e81bb599SAmlan Barua          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1198e81bb599SAmlan Barua          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1199e81bb599SAmlan Barua          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1200e81bb599SAmlan Barua          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1201b223cc97SAmlan Barua          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1202b223cc97SAmlan Barua          ierr = PetscFree(indx1);CHKERRQ(ierr);
1203e81bb599SAmlan Barua          break;
1204e81bb599SAmlan Barua     default:
12057536937bSAmlan Barua */
1206b3655f67SAmlan Barua     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
12076971673cSAmlan Barua     //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF);
12086971673cSAmlan Barua     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
12096971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
12106971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
12116971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1212b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
12137536937bSAmlan Barua   //       break;
12147536937bSAmlan Barua    // }
1215e81bb599SAmlan Barua   }
1216e81bb599SAmlan Barua   else{
1217e81bb599SAmlan Barua 
1218e81bb599SAmlan Barua  switch (ndim){
1219e81bb599SAmlan Barua  case 1:
122064657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
122164657d84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
12229496c9aeSAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1);
12239496c9aeSAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2);
122464657d84SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
122564657d84SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
122664657d84SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
122764657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
122864657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
122964657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
123064657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
123164657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
123264657d84SAmlan Barua #else
12336a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
123464657d84SAmlan Barua #endif
12355b3e41ffSAmlan Barua  break;
12365b3e41ffSAmlan Barua  case 2:
1237bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1238bd59e6c6SAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1239bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1240bd59e6c6SAmlan Barua       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
1241bd59e6c6SAmlan Barua       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1242bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1243bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1244bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1245bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1246bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1247bd59e6c6SAmlan Barua #else
12485b3e41ffSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
12495b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
12505b3e41ffSAmlan Barua 
12519496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
12529496c9aeSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
12535b3e41ffSAmlan Barua 
12545b3e41ffSAmlan Barua       if (dim[1]%2==0)
12555b3e41ffSAmlan Barua         NM = dim[1]+2;
12565b3e41ffSAmlan Barua       else
12575b3e41ffSAmlan Barua         NM = dim[1]+1;
12585b3e41ffSAmlan Barua 
12595b3e41ffSAmlan Barua       for (i=0;i<local_n0;i++){
12605b3e41ffSAmlan Barua          for (j=0;j<dim[1];j++){
12615b3e41ffSAmlan Barua             tempindx = i*dim[1] + j;
12625b3e41ffSAmlan Barua             tempindx1 = i*NM + j;
12635b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
12645b3e41ffSAmlan Barua             indx2[tempindx]=low+tempindx1;
12655b3e41ffSAmlan Barua          }
12665b3e41ffSAmlan Barua        }
12675b3e41ffSAmlan Barua 
12685b3e41ffSAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
12695b3e41ffSAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
12705b3e41ffSAmlan Barua 
12715b3e41ffSAmlan Barua        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
12725b3e41ffSAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
12735b3e41ffSAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
12745b3e41ffSAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1275b223cc97SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1276b223cc97SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1277b223cc97SAmlan Barua        ierr = PetscFree(indx1);CHKERRQ(ierr);
1278b223cc97SAmlan Barua        ierr = PetscFree(indx2);CHKERRQ(ierr);
1279bd59e6c6SAmlan Barua #endif
12809496c9aeSAmlan Barua  break;
12815b3e41ffSAmlan Barua  case 3:
1282bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1283bd59e6c6SAmlan Barua        alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1284bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1285bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1286bd59e6c6SAmlan Barua        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1287bd59e6c6SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1288bd59e6c6SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1289bd59e6c6SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1290bd59e6c6SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1291bd59e6c6SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1292bd59e6c6SAmlan Barua #else
1293bd59e6c6SAmlan Barua 
129451d1eed7SAmlan Barua        alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
129551d1eed7SAmlan Barua        N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
129651d1eed7SAmlan Barua 
12979496c9aeSAmlan Barua //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
12989496c9aeSAmlan Barua //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
12999496c9aeSAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
13009496c9aeSAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
130151d1eed7SAmlan Barua 
130251d1eed7SAmlan Barua        if (dim[2]%2==0)
130351d1eed7SAmlan Barua         NM = dim[2]+2;
130451d1eed7SAmlan Barua        else
130551d1eed7SAmlan Barua         NM = dim[2]+1;
130651d1eed7SAmlan Barua 
130751d1eed7SAmlan Barua        for (i=0;i<local_n0;i++){
130851d1eed7SAmlan Barua           for (j=0;j<dim[1];j++){
130951d1eed7SAmlan Barua              for (k=0;k<dim[2];k++){
131051d1eed7SAmlan Barua                 tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
131151d1eed7SAmlan Barua                 tempindx1 = i*dim[1]*NM + j*NM + k;
131251d1eed7SAmlan Barua                 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
131351d1eed7SAmlan Barua                 indx2[tempindx]=low+tempindx1;
131451d1eed7SAmlan Barua              }
131551d1eed7SAmlan Barua         //    printf("Val tempindx1 = %d\n",tempindx1);
131651d1eed7SAmlan Barua         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
131751d1eed7SAmlan Barua         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
131851d1eed7SAmlan Barua         //    printf("-------------------------\n",indx2[tempindx],rank);
131951d1eed7SAmlan Barua           }
132051d1eed7SAmlan Barua        }
132151d1eed7SAmlan Barua 
132251d1eed7SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
132351d1eed7SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
132451d1eed7SAmlan Barua 
132551d1eed7SAmlan Barua        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
132651d1eed7SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
132751d1eed7SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
132851d1eed7SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1329b223cc97SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1330b223cc97SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1331b223cc97SAmlan Barua        ierr = PetscFree(indx1);CHKERRQ(ierr);
1332b223cc97SAmlan Barua        ierr = PetscFree(indx2);CHKERRQ(ierr);
1333bd59e6c6SAmlan Barua #endif
13349496c9aeSAmlan Barua  break;
13355b3e41ffSAmlan Barua  default:
1336bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1337bd59e6c6SAmlan Barua        alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1338bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1339bd59e6c6SAmlan Barua        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1340bd59e6c6SAmlan Barua       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1341bd59e6c6SAmlan Barua       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1342bd59e6c6SAmlan Barua        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1343bd59e6c6SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1344bd59e6c6SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1345bd59e6c6SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1346bd59e6c6SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1347bd59e6c6SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1348bd59e6c6SAmlan Barua #else
1349ba6e06d0SAmlan Barua        temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1350ba6e06d0SAmlan Barua        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1351ba6e06d0SAmlan Barua        alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1352ba6e06d0SAmlan Barua        N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1353ba6e06d0SAmlan Barua        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1354ba6e06d0SAmlan Barua 
1355ba6e06d0SAmlan Barua        partial_dim = fftw->partial_dim;
1356ba6e06d0SAmlan Barua 
1357ba6e06d0SAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1358ba6e06d0SAmlan Barua        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1359ba6e06d0SAmlan Barua 
1360ba6e06d0SAmlan Barua        if (dim[ndim-1]%2==0)
1361ba6e06d0SAmlan Barua          NM = 2;
1362ba6e06d0SAmlan Barua        else
1363ba6e06d0SAmlan Barua          NM = 1;
1364ba6e06d0SAmlan Barua 
1365ba6e06d0SAmlan Barua        j = low;
1366ba6e06d0SAmlan Barua        for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1367ba6e06d0SAmlan Barua           {
1368ba6e06d0SAmlan Barua            indx1[i] = local_0_start*partial_dim + i;
1369ba6e06d0SAmlan Barua            indx2[i] = j;
1370ba6e06d0SAmlan Barua            if (k%dim[ndim-1]==0)
1371ba6e06d0SAmlan Barua              { j+=NM;}
1372ba6e06d0SAmlan Barua            j++;
1373ba6e06d0SAmlan Barua           }
1374ba6e06d0SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1375ba6e06d0SAmlan Barua        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1376ba6e06d0SAmlan Barua 
1377ba6e06d0SAmlan Barua        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1378ba6e06d0SAmlan Barua        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1379ba6e06d0SAmlan Barua        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1380ba6e06d0SAmlan Barua        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1381b223cc97SAmlan Barua        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1382b223cc97SAmlan Barua        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1383b223cc97SAmlan Barua        ierr = PetscFree(indx1);CHKERRQ(ierr);
1384b223cc97SAmlan Barua        ierr = PetscFree(indx2);CHKERRQ(ierr);
1385bd59e6c6SAmlan Barua #endif
13869496c9aeSAmlan Barua  break;
13875b3e41ffSAmlan Barua  }
1388e81bb599SAmlan Barua  }
13895b3e41ffSAmlan Barua  return 0;
13905b3e41ffSAmlan Barua }
13915b3e41ffSAmlan Barua EXTERN_C_END
13925b3e41ffSAmlan Barua 
13935b3e41ffSAmlan Barua EXTERN_C_BEGIN
13945b3e41ffSAmlan Barua #undef __FUNCT__
13953c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
13963c3a9c75SAmlan Barua /*
13973c3a9c75SAmlan Barua       MatCreate_FFTW - Creates a matrix object that provides FFT
13983c3a9c75SAmlan Barua   via the external package FFTW
13993c3a9c75SAmlan Barua   Options Database Keys:
14003c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
14013c3a9c75SAmlan Barua 
14023c3a9c75SAmlan Barua    Level: intermediate
14033c3a9c75SAmlan Barua 
14043c3a9c75SAmlan Barua */
14053c3a9c75SAmlan Barua 
1406b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A)
1407b2b77a04SHong Zhang {
1408b2b77a04SHong Zhang   PetscErrorCode ierr;
1409b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
1410b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1411b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1412b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1413b2b77a04SHong Zhang   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1414b2b77a04SHong Zhang   PetscBool      flg;
14159496c9aeSAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr,n1;
141611902ff2SHong Zhang   PetscMPIInt    size,rank;
14179496c9aeSAmlan Barua   ptrdiff_t      *pdim;
14189496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
14199496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
14209496c9aeSAmlan Barua    ptrdiff_t     temp;
14219496c9aeSAmlan Barua    PetscInt      N1;
14229496c9aeSAmlan Barua #endif
14239496c9aeSAmlan Barua 
1424b2b77a04SHong Zhang 
1425b2b77a04SHong Zhang   PetscFunctionBegin;
1426b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
142711902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
142854dd5118SAmlan Barua   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1429c92418ccSAmlan Barua 
143011902ff2SHong Zhang   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
143111902ff2SHong Zhang   pdim[0] = dim[0];
143211902ff2SHong Zhang   for (ctr=1;ctr<ndim;ctr++)
143311902ff2SHong Zhang      {
14346882372aSHong Zhang           partial_dim *= dim[ctr];
143511902ff2SHong Zhang           pdim[ctr] = dim[ctr];
14366882372aSHong Zhang      }
14373c3a9c75SAmlan Barua 
1438b2b77a04SHong Zhang   if (size == 1) {
1439e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1440b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1441b2b77a04SHong Zhang     n = N;
1442e81bb599SAmlan Barua #else
14439496c9aeSAmlan Barua     PetscInt tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1];
1444e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1445e81bb599SAmlan Barua     n = tot_dim;
1446e81bb599SAmlan Barua #endif
1447e81bb599SAmlan Barua 
1448b2b77a04SHong Zhang   } else {
14499496c9aeSAmlan Barua     ptrdiff_t alloc_local,local_n0,local_0_start;//local_1_end;
1450b2b77a04SHong Zhang     switch (ndim){
1451b2b77a04SHong Zhang     case 1:
14523c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
14533c3a9c75SAmlan Barua    SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1454e5d7f247SAmlan Barua #else
14559496c9aeSAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
14566882372aSHong Zhang       n = (PetscInt)local_n0;
14579496c9aeSAmlan Barua       n1 = (PetscInt) local_n1;
14589496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1459e5d7f247SAmlan Barua #endif
1460b2b77a04SHong Zhang       break;
1461b2b77a04SHong Zhang     case 2:
14625b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
1463b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1464b2b77a04SHong Zhang       /*
1465b2b77a04SHong Zhang        PetscMPIInt    rank;
1466b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1467b2b77a04SHong Zhang        PetscSynchronizedFlush(comm);
1468b2b77a04SHong Zhang        */
1469b2b77a04SHong Zhang       n = (PetscInt)local_n0*dim[1];
1470b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
14715b3e41ffSAmlan Barua #else
14725b3e41ffSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
14735b3e41ffSAmlan Barua       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1474*c8a8a4f0SAmlan Barua //      n1 = 2*(PetscInt)local_n1*(dim[0]);
1475*c8a8a4f0SAmlan Barua //      ierr = MatSetSizes(A,n1,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1476*c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
14775b3e41ffSAmlan Barua #endif
1478b2b77a04SHong Zhang       break;
1479b2b77a04SHong Zhang     case 3:
148051d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
148151d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
14826882372aSHong Zhang       n = (PetscInt)local_n0*dim[1]*dim[2];
14836882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
148451d1eed7SAmlan Barua #else
148551d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
148651d1eed7SAmlan Barua       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1487*c8a8a4f0SAmlan Barua //      n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
1488*c8a8a4f0SAmlan Barua //      ierr = MatSetSizes(A,n1,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1489*c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
149051d1eed7SAmlan Barua #endif
1491b2b77a04SHong Zhang       break;
1492b2b77a04SHong Zhang     default:
1493b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
149411902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
14956882372aSHong Zhang       n = (PetscInt)local_n0*partial_dim;
14966882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1497b3a17365SAmlan Barua #else
1498b3a17365SAmlan Barua       temp = pdim[ndim-1];
1499b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
1500b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1501b3a17365SAmlan Barua       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1502b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1503b3a17365SAmlan Barua       pdim[ndim-1] = temp;
1504*c8a8a4f0SAmlan Barua //      temp = partial_dim*(dim[ndim-1]/2+1)*dim[0]/(dim[1]*dim[ndim-1]);
1505*c8a8a4f0SAmlan Barua //      n1 = 2*local_n1*temp;
1506*c8a8a4f0SAmlan Barua //      ierr = MatSetSizes(A,n1,n,N1,N1);CHKERRQ(ierr);
1507*c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1508b3a17365SAmlan Barua #endif
1509b2b77a04SHong Zhang       break;
1510b2b77a04SHong Zhang     }
1511b2b77a04SHong Zhang   }
1512b2b77a04SHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1513b2b77a04SHong Zhang   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1514b2b77a04SHong Zhang   fft->data = (void*)fftw;
1515b2b77a04SHong Zhang 
1516b2b77a04SHong Zhang   fft->n           = n;
1517c92418ccSAmlan Barua   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1518e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
1519c92418ccSAmlan Barua   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1520b1301b2eSHong Zhang   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1521c92418ccSAmlan Barua 
1522b2b77a04SHong Zhang   fftw->p_forward  = 0;
1523b2b77a04SHong Zhang   fftw->p_backward = 0;
1524b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1525b2b77a04SHong Zhang 
1526b2b77a04SHong Zhang   if (size == 1){
1527b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1528b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1529b2b77a04SHong Zhang   } else {
1530b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1531b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1532b2b77a04SHong Zhang   }
1533b2b77a04SHong Zhang   fft->matdestroy          = MatDestroy_FFTW;
15346a506ed8SAmlan Barua   // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D
15354be45526SHong Zhang   //A->ops->getvecs       = MatGetVecs_FFTW;
1536b2b77a04SHong Zhang   A->assembled          = PETSC_TRUE;
15374be45526SHong Zhang   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);
15383c3a9c75SAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
15395b3e41ffSAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
1540b2b77a04SHong Zhang 
1541b2b77a04SHong Zhang   /* get runtime options */
1542b2b77a04SHong Zhang   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1543b2b77a04SHong Zhang     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1544b2b77a04SHong Zhang     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1545b2b77a04SHong Zhang   PetscOptionsEnd();
1546b2b77a04SHong Zhang   PetscFunctionReturn(0);
1547b2b77a04SHong Zhang }
1548b2b77a04SHong Zhang EXTERN_C_END
15493c3a9c75SAmlan Barua 
15503c3a9c75SAmlan Barua 
15513c3a9c75SAmlan Barua 
15523c3a9c75SAmlan Barua 
1553