xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision b223cc977cbead8f3dd4e85365fb7a637b32daab)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14e5d7f247SAmlan Barua   PetscInt   partial_dim;
15b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
16b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
19b2b77a04SHong Zhang } Mat_FFTW;
20b2b77a04SHong Zhang 
21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
28b2b77a04SHong Zhang 
29b2b77a04SHong Zhang #undef __FUNCT__
30b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
31b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
32b2b77a04SHong Zhang {
33b2b77a04SHong Zhang   PetscErrorCode ierr;
34b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
35b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
36b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
37b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
38b2b77a04SHong Zhang 
39b2b77a04SHong Zhang   PetscFunctionBegin;
4058a912c5SAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
41b9ae062cSHong Zhang 
4258a912c5SAmlan Barua //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
4358a912c5SAmlan Barua //#endif
44b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
45b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
46b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
47b2b77a04SHong Zhang     switch (ndim){
48b2b77a04SHong Zhang     case 1:
4958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
50b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
5158a912c5SAmlan Barua #else
5258a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
5358a912c5SAmlan Barua #endif
54b2b77a04SHong Zhang       break;
55b2b77a04SHong Zhang     case 2:
5658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
57b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
5858a912c5SAmlan Barua #else
5958a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
6058a912c5SAmlan Barua #endif
61b2b77a04SHong Zhang       break;
62b2b77a04SHong Zhang     case 3:
6358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
64b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6558a912c5SAmlan Barua #else
6658a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
6758a912c5SAmlan Barua #endif
68b2b77a04SHong Zhang       break;
69b2b77a04SHong Zhang     default:
7058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
71b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
7258a912c5SAmlan Barua #else
7358a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
7458a912c5SAmlan Barua #endif
75b2b77a04SHong Zhang       break;
76b2b77a04SHong Zhang     }
77b2b77a04SHong Zhang     fftw->finarray  = x_array;
78b2b77a04SHong Zhang     fftw->foutarray = y_array;
79b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
80b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
81b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
82b2b77a04SHong Zhang   } else { /* use existing plan */
83b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
84b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
85b2b77a04SHong Zhang     } else {
86b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
87b2b77a04SHong Zhang     }
88b2b77a04SHong Zhang   }
89b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
90b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
91b2b77a04SHong Zhang   PetscFunctionReturn(0);
92b2b77a04SHong Zhang }
93b2b77a04SHong Zhang 
94b2b77a04SHong Zhang #undef __FUNCT__
95b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
96b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
97b2b77a04SHong Zhang {
98b2b77a04SHong Zhang   PetscErrorCode ierr;
99b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
100b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
101b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
102b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
103b2b77a04SHong Zhang 
104b2b77a04SHong Zhang   PetscFunctionBegin;
105e81bb599SAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
106e81bb599SAmlan Barua //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
107e81bb599SAmlan Barua //#endif
108b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
109b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
110b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
111b2b77a04SHong Zhang     switch (ndim){
112b2b77a04SHong Zhang     case 1:
11358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
114b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
11558a912c5SAmlan Barua #else
116e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
11758a912c5SAmlan Barua #endif
118b2b77a04SHong Zhang       break;
119b2b77a04SHong Zhang     case 2:
12058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
121b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12258a912c5SAmlan Barua #else
123e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
12458a912c5SAmlan Barua #endif
125b2b77a04SHong Zhang       break;
126b2b77a04SHong Zhang     case 3:
12758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
128b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
12958a912c5SAmlan Barua #else
130e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
13158a912c5SAmlan Barua #endif
132b2b77a04SHong Zhang       break;
133b2b77a04SHong Zhang     default:
13458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
135b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
13658a912c5SAmlan Barua #else
137e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
13858a912c5SAmlan Barua #endif
139b2b77a04SHong Zhang       break;
140b2b77a04SHong Zhang     }
141b2b77a04SHong Zhang     fftw->binarray  = x_array;
142b2b77a04SHong Zhang     fftw->boutarray = y_array;
143b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
144b2b77a04SHong Zhang   } else { /* use existing plan */
145b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
146b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
147b2b77a04SHong Zhang     } else {
148b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
149b2b77a04SHong Zhang     }
150b2b77a04SHong Zhang   }
151b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
152b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
153b2b77a04SHong Zhang   PetscFunctionReturn(0);
154b2b77a04SHong Zhang }
155b2b77a04SHong Zhang 
156b2b77a04SHong Zhang #undef __FUNCT__
157b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
158b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
159b2b77a04SHong Zhang {
160b2b77a04SHong Zhang   PetscErrorCode ierr;
161b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
162b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
163b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
164c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
165b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
166c92418ccSAmlan Barua // PetscInt ctr;
167c92418ccSAmlan Barua //  ptrdiff_t      ndim1=(ptrdiff_t) ndim,*pdim;
168c92418ccSAmlan Barua //  ndim1=(ptrdiff_t) ndim;
169c92418ccSAmlan Barua //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
170c92418ccSAmlan Barua 
171c92418ccSAmlan Barua //  for(ctr=0;ctr<ndim;ctr++)
172c92418ccSAmlan Barua //     {
173c92418ccSAmlan Barua //      pdim[ctr] = dim[ctr];
174c92418ccSAmlan Barua //     }
175b2b77a04SHong Zhang 
176b2b77a04SHong Zhang   PetscFunctionBegin;
1775b3e41ffSAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
1785b3e41ffSAmlan Barua //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
1795b3e41ffSAmlan Barua //#endif
180c92418ccSAmlan Barua //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
181c92418ccSAmlan Barua //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
18211902ff2SHong Zhang 
183b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
184b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
185b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
186b2b77a04SHong Zhang     switch (ndim){
187b2b77a04SHong Zhang     case 1:
1883c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
189b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1903c3a9c75SAmlan Barua #endif
191b2b77a04SHong Zhang       break;
192b2b77a04SHong Zhang     case 2:
1933c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
194b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1953c3a9c75SAmlan Barua #else
1965b3e41ffSAmlan Barua       printf("The code comes here \n");
1973c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1983c3a9c75SAmlan Barua #endif
199b2b77a04SHong Zhang       break;
200b2b77a04SHong Zhang     case 3:
2013c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
202b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2033c3a9c75SAmlan Barua #else
20451d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2053c3a9c75SAmlan Barua #endif
206b2b77a04SHong Zhang       break;
207b2b77a04SHong Zhang     default:
2083c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
209c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2103c3a9c75SAmlan Barua #else
2113c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2123c3a9c75SAmlan Barua #endif
21311902ff2SHong Zhang  //     fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
214b2b77a04SHong Zhang       break;
215b2b77a04SHong Zhang     }
216b2b77a04SHong Zhang     fftw->finarray  = x_array;
217b2b77a04SHong Zhang     fftw->foutarray = y_array;
218b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
219b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
220b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
221b2b77a04SHong Zhang   } else { /* use existing plan */
222b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
223b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
224b2b77a04SHong Zhang     } else {
225b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
226b2b77a04SHong Zhang     }
227b2b77a04SHong Zhang   }
228b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
229b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
230b2b77a04SHong Zhang   PetscFunctionReturn(0);
231b2b77a04SHong Zhang }
232b2b77a04SHong Zhang 
233b2b77a04SHong Zhang #undef __FUNCT__
234b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
235b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
236b2b77a04SHong Zhang {
237b2b77a04SHong Zhang   PetscErrorCode ierr;
238b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
239b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
240b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
241c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
242b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
243c92418ccSAmlan Barua //  PetscInt       ctr;
244c92418ccSAmlan Barua //  ptrdiff_t      ndim1=(ptrdiff_t)ndim,*pdim;
245c92418ccSAmlan Barua //  ndim1=(ptrdiff_t) ndim;
246c92418ccSAmlan Barua //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
247c92418ccSAmlan Barua 
248c92418ccSAmlan Barua //  for(ctr=0;ctr<ndim;ctr++)
249c92418ccSAmlan Barua //     {
250c92418ccSAmlan Barua //      pdim[ctr] = dim[ctr];
251c92418ccSAmlan Barua //     }
252b2b77a04SHong Zhang 
253b2b77a04SHong Zhang   PetscFunctionBegin;
2543c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
2553c3a9c75SAmlan Barua //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
2563c3a9c75SAmlan Barua //#endif
257c92418ccSAmlan Barua //  ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr);
258c92418ccSAmlan Barua // should pdim be a member of Mat_FFTW?
259c92418ccSAmlan Barua //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
26011902ff2SHong Zhang 
261b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
262b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
263b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
264b2b77a04SHong Zhang     switch (ndim){
265b2b77a04SHong Zhang     case 1:
2663c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
267b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2683c3a9c75SAmlan Barua #endif
269b2b77a04SHong Zhang       break;
270b2b77a04SHong Zhang     case 2:
2713c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
272b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2733c3a9c75SAmlan Barua #else
2743c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2753c3a9c75SAmlan Barua #endif
276b2b77a04SHong Zhang       break;
277b2b77a04SHong Zhang     case 3:
2783c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
279b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2803c3a9c75SAmlan Barua #else
2813c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2823c3a9c75SAmlan Barua #endif
283b2b77a04SHong Zhang       break;
284b2b77a04SHong Zhang     default:
2853c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
286c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2873c3a9c75SAmlan Barua #else
2883c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2893c3a9c75SAmlan Barua #endif
290c92418ccSAmlan Barua //      fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
291b2b77a04SHong Zhang       break;
292b2b77a04SHong Zhang     }
293b2b77a04SHong Zhang     fftw->binarray  = x_array;
294b2b77a04SHong Zhang     fftw->boutarray = y_array;
295b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
296b2b77a04SHong Zhang   } else { /* use existing plan */
297b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
298b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
299b2b77a04SHong Zhang     } else {
300b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
301b2b77a04SHong Zhang     }
302b2b77a04SHong Zhang   }
303b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
304b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
305b2b77a04SHong Zhang   PetscFunctionReturn(0);
306b2b77a04SHong Zhang }
307b2b77a04SHong Zhang 
308b2b77a04SHong Zhang #undef __FUNCT__
309b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
310b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
311b2b77a04SHong Zhang {
312b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
313b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
314b2b77a04SHong Zhang   PetscErrorCode ierr;
315b2b77a04SHong Zhang 
316b2b77a04SHong Zhang   PetscFunctionBegin;
317*b223cc97SAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
318*b223cc97SAmlan Barua //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
319*b223cc97SAmlan Barua //#endif
320b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
321b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
322b1301b2eSHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
323bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
324b2b77a04SHong Zhang   PetscFunctionReturn(0);
325b2b77a04SHong Zhang }
326b2b77a04SHong Zhang 
327c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
328b2b77a04SHong Zhang #undef __FUNCT__
329b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
330b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
331b2b77a04SHong Zhang {
332b2b77a04SHong Zhang   PetscErrorCode  ierr;
333b2b77a04SHong Zhang   PetscScalar     *array;
334b2b77a04SHong Zhang 
335b2b77a04SHong Zhang   PetscFunctionBegin;
336b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
337b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
338b2b77a04SHong Zhang #endif
339b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
340b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
341b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
342b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
343b2b77a04SHong Zhang   PetscFunctionReturn(0);
344b2b77a04SHong Zhang }
345b2b77a04SHong Zhang 
346b2b77a04SHong Zhang #undef __FUNCT__
3473c3a9c75SAmlan Barua #define __FUNCT__ "MatGetVecs1DC_FFTW"
348c92418ccSAmlan Barua /*
349c92418ccSAmlan Barua    MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the
350c92418ccSAmlan Barua      parallel layout determined by FFTW-1D
351c92418ccSAmlan Barua 
352c92418ccSAmlan Barua */
353c92418ccSAmlan Barua PetscErrorCode  MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout)
354c92418ccSAmlan Barua {
355c92418ccSAmlan Barua   PetscErrorCode ierr;
356c92418ccSAmlan Barua   PetscMPIInt    size,rank;
357c92418ccSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
358c92418ccSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
359c92418ccSAmlan Barua //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
360c92418ccSAmlan Barua   PetscInt       N=fft->N;
361c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
362c92418ccSAmlan Barua   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
363c92418ccSAmlan Barua   ptrdiff_t      f_local_n1,f_local_1_end;
364c92418ccSAmlan Barua   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
365c92418ccSAmlan Barua   ptrdiff_t      b_local_n1,b_local_1_end;
366c92418ccSAmlan Barua   fftw_complex   *data_fin,*data_fout,*data_bin,*data_bout;
367c92418ccSAmlan Barua 
368c92418ccSAmlan Barua   PetscFunctionBegin;
369c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
370c92418ccSAmlan Barua   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
371c92418ccSAmlan Barua #endif
372c92418ccSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
373c92418ccSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
374c92418ccSAmlan Barua   if (size == 1){
375c92418ccSAmlan Barua     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
376c92418ccSAmlan Barua   }
377c92418ccSAmlan Barua   else {
378c92418ccSAmlan Barua       if (ndim>1){
379c92418ccSAmlan Barua         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
380c92418ccSAmlan Barua       else {
381c92418ccSAmlan Barua           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
382c92418ccSAmlan Barua           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
383c92418ccSAmlan Barua           if (fin) {
384c92418ccSAmlan Barua             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
385c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
386c92418ccSAmlan Barua             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
387c92418ccSAmlan Barua           }
388c92418ccSAmlan Barua           if (fout) {
389c92418ccSAmlan Barua             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
390c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
391c92418ccSAmlan Barua             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
392c92418ccSAmlan Barua           }
393c92418ccSAmlan Barua           if (bin) {
394c92418ccSAmlan Barua             data_bin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
395c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr);
396c92418ccSAmlan Barua             (*bin)->ops->destroy   = VecDestroy_MPIFFTW;
397c92418ccSAmlan Barua           }
398c92418ccSAmlan Barua           if (bout) {
399c92418ccSAmlan Barua             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
400c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
401c92418ccSAmlan Barua             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
402c92418ccSAmlan Barua           }
403c92418ccSAmlan Barua   }
404c92418ccSAmlan Barua   if (fin){
405c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
406c92418ccSAmlan Barua   }
407c92418ccSAmlan Barua   if (fout){
408c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
409c92418ccSAmlan Barua   }
410c92418ccSAmlan Barua   if (bin){
411c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr);
412c92418ccSAmlan Barua   }
413c92418ccSAmlan Barua   if (bout){
414c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
415c92418ccSAmlan Barua   }
416c92418ccSAmlan Barua   PetscFunctionReturn(0);
417c92418ccSAmlan Barua }
418c92418ccSAmlan Barua 
419c92418ccSAmlan Barua 
420c92418ccSAmlan Barua }
4213c3a9c75SAmlan Barua 
422c92418ccSAmlan Barua #undef __FUNCT__
423b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW"
424b2b77a04SHong Zhang /*
425b2b77a04SHong Zhang    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
426b2b77a04SHong Zhang      parallel layout determined by FFTW
427b2b77a04SHong Zhang 
428b2b77a04SHong Zhang    Collective on Mat
429b2b77a04SHong Zhang 
430b2b77a04SHong Zhang    Input Parameter:
431b2b77a04SHong Zhang .  mat - the matrix
432b2b77a04SHong Zhang 
433b2b77a04SHong Zhang    Output Parameter:
434b2b77a04SHong Zhang +   fin - (optional) input vector of forward FFTW
435b2b77a04SHong Zhang -   fout - (optional) output vector of forward FFTW
436b2b77a04SHong Zhang 
437b2b77a04SHong Zhang   Level: advanced
438b2b77a04SHong Zhang 
439b2b77a04SHong Zhang .seealso: MatCreateFFTW()
440b2b77a04SHong Zhang */
441b2b77a04SHong Zhang PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
442b2b77a04SHong Zhang {
443b2b77a04SHong Zhang   PetscErrorCode ierr;
444b2b77a04SHong Zhang   PetscMPIInt    size,rank;
445b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
446b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
447c92418ccSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
4483c3a9c75SAmlan Barua   PetscInt       N=fft->N, N1, n1,vsize;
449e81bb599SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
450b2b77a04SHong Zhang 
451b2b77a04SHong Zhang   PetscFunctionBegin;
4523c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
4533c3a9c75SAmlan Barua //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
4543c3a9c75SAmlan Barua //#endif
455b2b77a04SHong Zhang   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
456b2b77a04SHong Zhang   PetscValidType(A,1);
457b2b77a04SHong Zhang 
458b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
459b2b77a04SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
460b2b77a04SHong Zhang   if (size == 1){ /* sequential case */
461e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX)
462b2b77a04SHong Zhang     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
463b2b77a04SHong Zhang     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
464e5d7f247SAmlan Barua #else
465e81bb599SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
466e81bb599SAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
4673c3a9c75SAmlan Barua     printf("The code successfully comes at the end of the routine with one processor\n");
468e81bb599SAmlan Barua #endif
469b2b77a04SHong Zhang   } else {        /* mpi case */
470b2b77a04SHong Zhang     ptrdiff_t      alloc_local,local_n0,local_0_start;
4716882372aSHong Zhang     ptrdiff_t      local_n1,local_1_end;
472b2b77a04SHong Zhang     fftw_complex   *data_fin,*data_fout;
47351d1eed7SAmlan Barua     double         *data_finr ;
474b3a17365SAmlan Barua     ptrdiff_t      local_1_start,temp;
475c92418ccSAmlan Barua //    PetscInt ctr;
476c92418ccSAmlan Barua //    ptrdiff_t      ndim1,*pdim;
477c92418ccSAmlan Barua //    ndim1=(ptrdiff_t) ndim;
478c92418ccSAmlan Barua //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
47911902ff2SHong Zhang 
480c92418ccSAmlan Barua //    for(ctr=0;ctr<ndim;ctr++)
481f76f76beSAmlan Barua //        {k
482c92418ccSAmlan Barua //           pdim[ctr] = dim[ctr];
483c92418ccSAmlan Barua //       }
484b2b77a04SHong Zhang 
485f76f76beSAmlan Barua 
486f76f76beSAmlan Barua 
487b2b77a04SHong Zhang     switch (ndim){
488b2b77a04SHong Zhang     case 1:
4896882372aSHong Zhang       /* Get local size */
490e5d7f247SAmlan Barua       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
491c92418ccSAmlan Barua //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
4926882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
4936882372aSHong Zhang       if (fin) {
4946882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4956882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
4966882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4976882372aSHong Zhang       }
4986882372aSHong Zhang       if (fout) {
4996882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5006882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5016882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5026882372aSHong Zhang       }
503b2b77a04SHong Zhang       break;
504b2b77a04SHong Zhang     case 2:
5053c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5063c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5073c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
5083c3a9c75SAmlan Barua       if (fin) {
5093c3a9c75SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
51054dd5118SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
5113c3a9c75SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
51209dd8a53SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
5133c3a9c75SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5143c3a9c75SAmlan Barua       }
5153c3a9c75SAmlan Barua       if (fout) {
51609dd8a53SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
51709dd8a53SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5183c3a9c75SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5193c3a9c75SAmlan Barua       }
520f76f76beSAmlan Barua       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
5213c3a9c75SAmlan Barua 
5223c3a9c75SAmlan Barua #else
523b2b77a04SHong Zhang       /* Get local size */
52409dd8a53SAmlan Barua      printf("Hope this does not come here");
525b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
526b2b77a04SHong Zhang       if (fin) {
527b2b77a04SHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
528b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
529b2b77a04SHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
530b2b77a04SHong Zhang       }
531b2b77a04SHong Zhang       if (fout) {
532b2b77a04SHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
533b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
534b2b77a04SHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
535b2b77a04SHong Zhang       }
5363c3a9c75SAmlan Barua      printf("Hope this does not come here");
5373c3a9c75SAmlan Barua #endif
538b2b77a04SHong Zhang       break;
539b2b77a04SHong Zhang     case 3:
5406882372aSHong Zhang       /* Get local size */
5413c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
54251d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
54351d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
54451d1eed7SAmlan Barua       if (fin) {
54551d1eed7SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
54651d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
54751d1eed7SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
54851d1eed7SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
54951d1eed7SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
55051d1eed7SAmlan Barua       }
55151d1eed7SAmlan Barua       if (fout) {
55251d1eed7SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
55351d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
55451d1eed7SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
55551d1eed7SAmlan Barua       }
55651d1eed7SAmlan Barua       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
55751d1eed7SAmlan Barua 
55851d1eed7SAmlan Barua 
5593c3a9c75SAmlan Barua #else
5606882372aSHong Zhang       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
56111902ff2SHong Zhang //      printf("The quantity n is %d",n);
5626882372aSHong Zhang       if (fin) {
5636882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5646882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5656882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5666882372aSHong Zhang       }
5676882372aSHong Zhang       if (fout) {
5686882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5696882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5706882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5716882372aSHong Zhang       }
5723c3a9c75SAmlan Barua #endif
573b2b77a04SHong Zhang       break;
574b2b77a04SHong Zhang     default:
5756882372aSHong Zhang       /* Get local size */
5763c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
577b3a17365SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
578b3a17365SAmlan Barua       printf("The value of temp is %ld\n",temp);
579b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
580b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
581b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
582b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
583b3a17365SAmlan Barua       if (fin) {
584b3a17365SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
585b3a17365SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
586b3a17365SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
587b3a17365SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
588b3a17365SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
589b3a17365SAmlan Barua       }
590b3a17365SAmlan Barua       if (fout) {
591b3a17365SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
592b3a17365SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
593b3a17365SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
594b3a17365SAmlan Barua       }
595b3a17365SAmlan Barua 
5963c3a9c75SAmlan Barua #else
597c92418ccSAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
59811902ff2SHong Zhang //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
59911902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
60011902ff2SHong Zhang //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
60111902ff2SHong Zhang //      for(i=0;i<ndim;i++)
60211902ff2SHong Zhang //         {
60311902ff2SHong Zhang //          pdim[i]=dim[i];printf("%d",pdim[i]);
60411902ff2SHong Zhang //         }
60511902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
60611902ff2SHong Zhang //      printf("The quantity n is %d",n);
6076882372aSHong Zhang       if (fin) {
6086882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6096882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
6106882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6116882372aSHong Zhang       }
6126882372aSHong Zhang       if (fout) {
6136882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6146882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6156882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6166882372aSHong Zhang       }
6173c3a9c75SAmlan Barua #endif
618b2b77a04SHong Zhang       break;
619b2b77a04SHong Zhang     }
620b2b77a04SHong Zhang   }
62154dd5118SAmlan Barua //  if (fin){
62254dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
62354dd5118SAmlan Barua //  }
62454dd5118SAmlan Barua //  if (fout){
62554dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
62654dd5118SAmlan Barua //  }
627b2b77a04SHong Zhang   PetscFunctionReturn(0);
628b2b77a04SHong Zhang }
629b2b77a04SHong Zhang 
630b2b77a04SHong Zhang #undef __FUNCT__
6313c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT"
6323c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
6333c3a9c75SAmlan Barua {
6343c3a9c75SAmlan Barua   PetscErrorCode ierr;
6353c3a9c75SAmlan Barua   PetscFunctionBegin;
6363c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
6373c3a9c75SAmlan Barua   PetscFunctionReturn(0);
6383c3a9c75SAmlan Barua }
63954efbe56SHong Zhang 
640b2b77a04SHong Zhang /*
6413c3a9c75SAmlan Barua       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block
6423c3a9c75SAmlan Barua   Input A, x, y
6433c3a9c75SAmlan Barua   A - FFTW matrix
64454dd5118SAmlan Barua   x - user data
645b2b77a04SHong Zhang   Options Database Keys:
646b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags
647b2b77a04SHong Zhang 
648b2b77a04SHong Zhang    Level: intermediate
649b2b77a04SHong Zhang 
650b2b77a04SHong Zhang */
6513c3a9c75SAmlan Barua 
6523c3a9c75SAmlan Barua EXTERN_C_BEGIN
6533c3a9c75SAmlan Barua #undef __FUNCT__
6543c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW"
6553c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
6563c3a9c75SAmlan Barua {
6573c3a9c75SAmlan Barua   PetscErrorCode ierr;
6583c3a9c75SAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
6593c3a9c75SAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
6603c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
6613c3a9c75SAmlan Barua   PetscInt       N=fft->N, N1, n1 ,NM;
662*b223cc97SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
663*b223cc97SAmlan Barua   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
664e5d7f247SAmlan Barua   PetscInt       i,j,k,rank,size,partial_dim;
6653c3a9c75SAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
666e5d7f247SAmlan Barua   ptrdiff_t      local_n1,local_1_start,temp;
6673c3a9c75SAmlan Barua   VecScatter     vecscat;
6683c3a9c75SAmlan Barua   IS             list1,list2;
6693c3a9c75SAmlan Barua 
670f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
671f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
6723c3a9c75SAmlan Barua   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
673f76f76beSAmlan Barua   printf("Local ownership starts at %d\n",low);
6743c3a9c75SAmlan Barua 
675e81bb599SAmlan Barua   if (size==1)
676e81bb599SAmlan Barua     {
677e81bb599SAmlan Barua      switch (ndim){
678e81bb599SAmlan Barua      case 1:
679e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
680e81bb599SAmlan Barua           for (i=0;i<dim[0];i++)
681e81bb599SAmlan Barua              {
682e81bb599SAmlan Barua               indx1[i] = i;
683e81bb599SAmlan Barua              }
684e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
685e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
686e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
687e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
688e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
689*b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
690*b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
691e81bb599SAmlan Barua      break;
692e81bb599SAmlan Barua 
693e81bb599SAmlan Barua      case 2:
694e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
695e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
696e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
697e81bb599SAmlan Barua                 indx1[i*dim[1]+j] = i*dim[1] + j;
698e81bb599SAmlan Barua              }
699e81bb599SAmlan Barua           }
700e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
701e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
702e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
703e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
704e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
705*b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
706*b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
707e81bb599SAmlan Barua           break;
708e81bb599SAmlan Barua      case 3:
709e81bb599SAmlan Barua           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
710e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
711e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
712e81bb599SAmlan Barua                 for (k=0;k<dim[2];k++){
713e81bb599SAmlan Barua                    indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
714e81bb599SAmlan Barua                 }
715e81bb599SAmlan Barua              }
716e81bb599SAmlan Barua           }
717e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
718e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
719e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
720e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
721e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
722*b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
723*b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
724e81bb599SAmlan Barua           break;
725e81bb599SAmlan Barua      default:
7266971673cSAmlan Barua           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);
7276971673cSAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
7286971673cSAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7296971673cSAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7306971673cSAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
731*b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
7326971673cSAmlan Barua           //ierr = ISDestroy(list1);CHKERRQ(ierr);
733e81bb599SAmlan Barua           break;
734e81bb599SAmlan Barua       }
735e81bb599SAmlan Barua     }
736e81bb599SAmlan Barua 
737e81bb599SAmlan Barua  else{
738e81bb599SAmlan Barua 
7393c3a9c75SAmlan Barua  switch (ndim){
7403c3a9c75SAmlan Barua  case 1:
741e5d7f247SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
7423c3a9c75SAmlan Barua   break;
7433c3a9c75SAmlan Barua  case 2:
7443c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
7453c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
7463c3a9c75SAmlan Barua 
7475b3e41ffSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
7485b3e41ffSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
7495b3e41ffSAmlan Barua       printf("Val local_0_start = %ld\n",local_0_start);
7503c3a9c75SAmlan Barua 
7513c3a9c75SAmlan Barua       if (dim[1]%2==0)
7523c3a9c75SAmlan Barua         NM = dim[1]+2;
7533c3a9c75SAmlan Barua       else
7543c3a9c75SAmlan Barua         NM = dim[1]+1;
7553c3a9c75SAmlan Barua 
7563c3a9c75SAmlan Barua       for (i=0;i<local_n0;i++){
7573c3a9c75SAmlan Barua          for (j=0;j<dim[1];j++){
7583c3a9c75SAmlan Barua             tempindx = i*dim[1] + j;
7593c3a9c75SAmlan Barua             tempindx1 = i*NM + j;
7605b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
7613c3a9c75SAmlan Barua             indx2[tempindx]=low+tempindx1;
7625b3e41ffSAmlan Barua            // printf("Val tempindx1 = %d\n",tempindx1);
7635b3e41ffSAmlan Barua   //          printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
7645b3e41ffSAmlan Barua   //          printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
7655b3e41ffSAmlan Barua   //          printf("-------------------------\n",indx2[tempindx],rank);
7663c3a9c75SAmlan Barua         }
7673c3a9c75SAmlan Barua      }
7683c3a9c75SAmlan Barua 
7693c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
7703c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
7713c3a9c75SAmlan Barua 
772f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
773f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
774f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
775f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
776*b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
777*b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
778*b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
779*b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
7803c3a9c75SAmlan Barua       break;
7813c3a9c75SAmlan Barua 
7823c3a9c75SAmlan Barua  case 3:
78351d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
78451d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
78551d1eed7SAmlan Barua 
78651d1eed7SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
78751d1eed7SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
78851d1eed7SAmlan Barua       printf("Val local_0_start = %ld\n",local_0_start);
78951d1eed7SAmlan Barua 
79051d1eed7SAmlan Barua       if (dim[2]%2==0)
79151d1eed7SAmlan Barua         NM = dim[2]+2;
79251d1eed7SAmlan Barua       else
79351d1eed7SAmlan Barua         NM = dim[2]+1;
79451d1eed7SAmlan Barua 
79551d1eed7SAmlan Barua       for (i=0;i<local_n0;i++){
79651d1eed7SAmlan Barua          for (j=0;j<dim[1];j++){
79751d1eed7SAmlan Barua             for (k=0;k<dim[2];k++){
79851d1eed7SAmlan Barua                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
79951d1eed7SAmlan Barua                tempindx1 = i*dim[1]*NM + j*NM + k;
80051d1eed7SAmlan Barua                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
80151d1eed7SAmlan Barua                indx2[tempindx]=low+tempindx1;
80251d1eed7SAmlan Barua             }
80351d1eed7SAmlan Barua            // printf("Val tempindx1 = %d\n",tempindx1);
80451d1eed7SAmlan Barua            // printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
80551d1eed7SAmlan Barua            // printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
80651d1eed7SAmlan Barua            // printf("-------------------------\n",indx2[tempindx],rank);
80751d1eed7SAmlan Barua         }
80851d1eed7SAmlan Barua      }
80951d1eed7SAmlan Barua 
81051d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
81151d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
81251d1eed7SAmlan Barua 
81351d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
81451d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
81551d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
81651d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
817*b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
818*b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
819*b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
820*b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
8213c3a9c75SAmlan Barua       break;
8223c3a9c75SAmlan Barua 
8233c3a9c75SAmlan Barua  default:
824e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
825e5d7f247SAmlan Barua       printf("The value of temp is %ld\n",temp);
826e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
827e5d7f247SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
828e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
829e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
830e5d7f247SAmlan Barua 
831e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
832e5d7f247SAmlan Barua       printf("The value of partial dim is %d\n",partial_dim);
833e5d7f247SAmlan Barua 
834e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
835e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
836e5d7f247SAmlan Barua       printf("Val local_0_start = %ld\n",local_0_start);
837e5d7f247SAmlan Barua 
838e5d7f247SAmlan Barua       if (dim[ndim-1]%2==0)
839ba6e06d0SAmlan Barua         NM = 2;
840e5d7f247SAmlan Barua       else
841ba6e06d0SAmlan Barua         NM = 1;
842e5d7f247SAmlan Barua 
8436971673cSAmlan Barua       j = low;
8446971673cSAmlan Barua       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
8456971673cSAmlan Barua          {
8466971673cSAmlan Barua           indx1[i] = local_0_start*partial_dim + i;
8476971673cSAmlan Barua           indx2[i] = j;
848ba6e06d0SAmlan Barua           //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
8496971673cSAmlan Barua           if (k%dim[ndim-1]==0)
8506971673cSAmlan Barua             { j+=NM;}
8516971673cSAmlan Barua           j++;
8526971673cSAmlan Barua          }
8536971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
8546971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
8556971673cSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
8566971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8576971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8586971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
859*b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
860*b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
861*b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
862*b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
8633c3a9c75SAmlan Barua       break;
8643c3a9c75SAmlan Barua   }
865e81bb599SAmlan Barua  }
8663c3a9c75SAmlan Barua 
8673c3a9c75SAmlan Barua  return 0;
8683c3a9c75SAmlan Barua }
8693c3a9c75SAmlan Barua EXTERN_C_END
8703c3a9c75SAmlan Barua 
8713c3a9c75SAmlan Barua #undef __FUNCT__
8723c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT"
8733c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
8743c3a9c75SAmlan Barua {
8753c3a9c75SAmlan Barua   PetscErrorCode ierr;
8763c3a9c75SAmlan Barua   PetscFunctionBegin;
8773c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
8783c3a9c75SAmlan Barua   PetscFunctionReturn(0);
8793c3a9c75SAmlan Barua }
88054efbe56SHong Zhang 
8815b3e41ffSAmlan Barua /*
8825b3e41ffSAmlan Barua       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
8835b3e41ffSAmlan Barua   Input A, x, y
8845b3e41ffSAmlan Barua   A - FFTW matrix
8855b3e41ffSAmlan Barua   x - FFTW vector
8865b3e41ffSAmlan Barua   y - PETSc vector that user can read
8875b3e41ffSAmlan Barua   Options Database Keys:
8885b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
8895b3e41ffSAmlan Barua 
8905b3e41ffSAmlan Barua    Level: intermediate
8915b3e41ffSAmlan Barua 
8923c3a9c75SAmlan Barua */
8933c3a9c75SAmlan Barua 
8943c3a9c75SAmlan Barua EXTERN_C_BEGIN
8953c3a9c75SAmlan Barua #undef __FUNCT__
8965b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW"
8975b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
8985b3e41ffSAmlan Barua {
8995b3e41ffSAmlan Barua   PetscErrorCode ierr;
9005b3e41ffSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
9015b3e41ffSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
9025b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
9035b3e41ffSAmlan Barua   PetscInt       N=fft->N, N1, n1 ,NM;
904*b223cc97SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
905*b223cc97SAmlan Barua   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
906ba6e06d0SAmlan Barua   PetscInt       i,j,k,rank,size,partial_dim;
9075b3e41ffSAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
908ba6e06d0SAmlan Barua   ptrdiff_t      local_n1,local_1_start,temp;
9095b3e41ffSAmlan Barua   VecScatter     vecscat;
9105b3e41ffSAmlan Barua   IS             list1,list2;
9115b3e41ffSAmlan Barua 
9125b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
9135b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
914cfe1ae98SAmlan Barua   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);
9155b3e41ffSAmlan Barua   printf("Local ownership starts at %d\n",low);
9165b3e41ffSAmlan Barua 
917e81bb599SAmlan Barua   if (size==1){
9185b3e41ffSAmlan Barua     switch (ndim){
9195b3e41ffSAmlan Barua     case 1:
920e81bb599SAmlan Barua            ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
921e81bb599SAmlan Barua           for (i=0;i<dim[0];i++)
922e81bb599SAmlan Barua              {
923e81bb599SAmlan Barua               indx1[i] = i;
924e81bb599SAmlan Barua              }
925e81bb599SAmlan Barua           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
926e81bb599SAmlan Barua           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
927e81bb599SAmlan Barua           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
928e81bb599SAmlan Barua           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
929e81bb599SAmlan Barua           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
930*b223cc97SAmlan Barua           ierr = ISDestroy(&list1);CHKERRQ(ierr);
931*b223cc97SAmlan Barua           ierr = PetscFree(indx1);CHKERRQ(ierr);
932e81bb599SAmlan Barua           break;
933e81bb599SAmlan Barua 
934e81bb599SAmlan Barua     case 2:
935e81bb599SAmlan Barua          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
936e81bb599SAmlan Barua           for (i=0;i<dim[0];i++){
937e81bb599SAmlan Barua              for (j=0;j<dim[1];j++){
938e81bb599SAmlan Barua                 indx1[i*dim[1]+j] = i*dim[1] + j;
939e81bb599SAmlan Barua              }
940e81bb599SAmlan Barua           }
941e81bb599SAmlan Barua          ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
942e81bb599SAmlan Barua          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
943e81bb599SAmlan Barua          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
944e81bb599SAmlan Barua          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945e81bb599SAmlan Barua          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
946*b223cc97SAmlan Barua          ierr = ISDestroy(&list1);CHKERRQ(ierr);
947*b223cc97SAmlan Barua          ierr = PetscFree(indx1);CHKERRQ(ierr);
948e81bb599SAmlan Barua          break;
949e81bb599SAmlan Barua     case 3:
950e81bb599SAmlan Barua          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
951e81bb599SAmlan Barua          for (i=0;i<dim[0];i++){
952e81bb599SAmlan Barua             for (j=0;j<dim[1];j++){
953e81bb599SAmlan Barua                for (k=0;k<dim[2];k++){
954e81bb599SAmlan Barua                   indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
955e81bb599SAmlan Barua                }
956e81bb599SAmlan Barua             }
957e81bb599SAmlan Barua          }
958e81bb599SAmlan Barua          ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
959e81bb599SAmlan Barua          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
960e81bb599SAmlan Barua          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
961e81bb599SAmlan Barua          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962e81bb599SAmlan Barua          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
963*b223cc97SAmlan Barua          ierr = ISDestroy(&list1);CHKERRQ(ierr);
964*b223cc97SAmlan Barua          ierr = PetscFree(indx1);CHKERRQ(ierr);
965e81bb599SAmlan Barua          break;
966e81bb599SAmlan Barua     default:
9676971673cSAmlan Barua          ierr = ISCreateStride(comm,N,0,1,&list1);
9686971673cSAmlan Barua          //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF);
9696971673cSAmlan Barua          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
9706971673cSAmlan Barua          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9716971673cSAmlan Barua          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9726971673cSAmlan Barua          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
973*b223cc97SAmlan Barua          ierr = ISDestroy(&list1);CHKERRQ(ierr);
974e81bb599SAmlan Barua          break;
975e81bb599SAmlan Barua     }
976e81bb599SAmlan Barua   }
977e81bb599SAmlan Barua   else{
978e81bb599SAmlan Barua 
979e81bb599SAmlan Barua  switch (ndim){
980e81bb599SAmlan Barua  case 1:
981e81bb599SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"No support yet");
9825b3e41ffSAmlan Barua   break;
9835b3e41ffSAmlan Barua  case 2:
9845b3e41ffSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
9855b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
9865b3e41ffSAmlan Barua 
9875b3e41ffSAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
9885b3e41ffSAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
9895b3e41ffSAmlan Barua      printf("Val local_0_start = %ld\n",local_0_start);
9905b3e41ffSAmlan Barua 
9915b3e41ffSAmlan Barua      if (dim[1]%2==0)
9925b3e41ffSAmlan Barua       NM = dim[1]+2;
9935b3e41ffSAmlan Barua     else
9945b3e41ffSAmlan Barua       NM = dim[1]+1;
9955b3e41ffSAmlan Barua 
9965b3e41ffSAmlan Barua 
9975b3e41ffSAmlan Barua 
9985b3e41ffSAmlan Barua      for (i=0;i<local_n0;i++){
9995b3e41ffSAmlan Barua         for (j=0;j<dim[1];j++){
10005b3e41ffSAmlan Barua             tempindx = i*dim[1] + j;
10015b3e41ffSAmlan Barua             tempindx1 = i*NM + j;
10025b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
10035b3e41ffSAmlan Barua             indx2[tempindx]=low+tempindx1;
1004cfe1ae98SAmlan Barua        //     printf("Val tempindx1 = %d\n",tempindx1);
1005cfe1ae98SAmlan Barua        //     printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1006cfe1ae98SAmlan Barua        //     printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1007cfe1ae98SAmlan Barua        //     printf("-------------------------\n",indx2[tempindx],rank);
10085b3e41ffSAmlan Barua         }
10095b3e41ffSAmlan Barua      }
10105b3e41ffSAmlan Barua 
10115b3e41ffSAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
10125b3e41ffSAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
10135b3e41ffSAmlan Barua 
10145b3e41ffSAmlan Barua      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
10155b3e41ffSAmlan Barua      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10165b3e41ffSAmlan Barua      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10175b3e41ffSAmlan Barua      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1018*b223cc97SAmlan Barua      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1019*b223cc97SAmlan Barua      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1020*b223cc97SAmlan Barua      ierr = PetscFree(indx1);CHKERRQ(ierr);
1021*b223cc97SAmlan Barua      ierr = PetscFree(indx2);CHKERRQ(ierr);
10225b3e41ffSAmlan Barua   break;
10235b3e41ffSAmlan Barua 
10245b3e41ffSAmlan Barua  case 3:
102551d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
102651d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
102751d1eed7SAmlan Barua 
102851d1eed7SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
102951d1eed7SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
103051d1eed7SAmlan Barua      printf("Val local_0_start = %ld\n",local_0_start);
103151d1eed7SAmlan Barua 
103251d1eed7SAmlan Barua      if (dim[2]%2==0)
103351d1eed7SAmlan Barua       NM = dim[2]+2;
103451d1eed7SAmlan Barua     else
103551d1eed7SAmlan Barua       NM = dim[2]+1;
103651d1eed7SAmlan Barua 
103751d1eed7SAmlan Barua      for (i=0;i<local_n0;i++){
103851d1eed7SAmlan Barua         for (j=0;j<dim[1];j++){
103951d1eed7SAmlan Barua            for (k=0;k<dim[2];k++){
104051d1eed7SAmlan Barua               tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
104151d1eed7SAmlan Barua               tempindx1 = i*dim[1]*NM + j*NM + k;
104251d1eed7SAmlan Barua               indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
104351d1eed7SAmlan Barua               indx2[tempindx]=low+tempindx1;
104451d1eed7SAmlan Barua            }
104551d1eed7SAmlan Barua         //    printf("Val tempindx1 = %d\n",tempindx1);
104651d1eed7SAmlan Barua         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
104751d1eed7SAmlan Barua         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
104851d1eed7SAmlan Barua         //    printf("-------------------------\n",indx2[tempindx],rank);
104951d1eed7SAmlan Barua         }
105051d1eed7SAmlan Barua      }
105151d1eed7SAmlan Barua 
105251d1eed7SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
105351d1eed7SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
105451d1eed7SAmlan Barua 
105551d1eed7SAmlan Barua      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
105651d1eed7SAmlan Barua      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
105751d1eed7SAmlan Barua      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
105851d1eed7SAmlan Barua      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1059*b223cc97SAmlan Barua      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1060*b223cc97SAmlan Barua      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1061*b223cc97SAmlan Barua      ierr = PetscFree(indx1);CHKERRQ(ierr);
1062*b223cc97SAmlan Barua      ierr = PetscFree(indx2);CHKERRQ(ierr);
10635b3e41ffSAmlan Barua   break;
10645b3e41ffSAmlan Barua 
10655b3e41ffSAmlan Barua   default:
1066ba6e06d0SAmlan Barua      temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1067ba6e06d0SAmlan Barua      printf("The value of temp is %ld\n",temp);
1068ba6e06d0SAmlan Barua      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1069ba6e06d0SAmlan Barua      alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1070ba6e06d0SAmlan Barua      N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1071ba6e06d0SAmlan Barua      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1072ba6e06d0SAmlan Barua 
1073ba6e06d0SAmlan Barua      partial_dim = fftw->partial_dim;
1074ba6e06d0SAmlan Barua      printf("The value of partial dim is %d\n",partial_dim);
1075ba6e06d0SAmlan Barua 
1076ba6e06d0SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1077ba6e06d0SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1078ba6e06d0SAmlan Barua      printf("Val local_0_start = %ld\n",local_0_start);
1079ba6e06d0SAmlan Barua 
1080ba6e06d0SAmlan Barua      if (dim[ndim-1]%2==0)
1081ba6e06d0SAmlan Barua        NM = 2;
1082ba6e06d0SAmlan Barua      else
1083ba6e06d0SAmlan Barua        NM = 1;
1084ba6e06d0SAmlan Barua 
1085ba6e06d0SAmlan Barua      j = low;
1086ba6e06d0SAmlan Barua      for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1087ba6e06d0SAmlan Barua         {
1088ba6e06d0SAmlan Barua          indx1[i] = local_0_start*partial_dim + i;
1089ba6e06d0SAmlan Barua          indx2[i] = j;
1090ba6e06d0SAmlan Barua          //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
1091ba6e06d0SAmlan Barua          if (k%dim[ndim-1]==0)
1092ba6e06d0SAmlan Barua            { j+=NM;}
1093ba6e06d0SAmlan Barua          j++;
1094ba6e06d0SAmlan Barua         }
1095ba6e06d0SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1096ba6e06d0SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1097ba6e06d0SAmlan Barua 
1098ba6e06d0SAmlan Barua       //ISView(list1,PETSC_VIEWER_STDOUT_SELF);
1099ba6e06d0SAmlan Barua 
1100ba6e06d0SAmlan Barua 
1101ba6e06d0SAmlan Barua      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1102ba6e06d0SAmlan Barua      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1103ba6e06d0SAmlan Barua      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1104ba6e06d0SAmlan Barua      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1105*b223cc97SAmlan Barua      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1106*b223cc97SAmlan Barua      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1107*b223cc97SAmlan Barua      ierr = PetscFree(indx1);CHKERRQ(ierr);
1108*b223cc97SAmlan Barua      ierr = PetscFree(indx2);CHKERRQ(ierr);
1109ba6e06d0SAmlan Barua 
11105b3e41ffSAmlan Barua   break;
11115b3e41ffSAmlan Barua  }
1112e81bb599SAmlan Barua  }
11135b3e41ffSAmlan Barua  return 0;
11145b3e41ffSAmlan Barua }
11155b3e41ffSAmlan Barua EXTERN_C_END
11165b3e41ffSAmlan Barua 
11175b3e41ffSAmlan Barua EXTERN_C_BEGIN
11185b3e41ffSAmlan Barua #undef __FUNCT__
11193c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
11203c3a9c75SAmlan Barua /*
11213c3a9c75SAmlan Barua       MatCreate_FFTW - Creates a matrix object that provides FFT
11223c3a9c75SAmlan Barua   via the external package FFTW
11233c3a9c75SAmlan Barua   Options Database Keys:
11243c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
11253c3a9c75SAmlan Barua 
11263c3a9c75SAmlan Barua    Level: intermediate
11273c3a9c75SAmlan Barua 
11283c3a9c75SAmlan Barua */
11293c3a9c75SAmlan Barua 
1130b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A)
1131b2b77a04SHong Zhang {
1132b2b77a04SHong Zhang   PetscErrorCode ierr;
1133b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
1134b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1135b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1136b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1137b2b77a04SHong Zhang   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1138b2b77a04SHong Zhang   PetscBool      flg;
1139b3a17365SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr,N1;
114011902ff2SHong Zhang   PetscMPIInt    size,rank;
1141b3a17365SAmlan Barua   ptrdiff_t      *pdim, temp;
11425b3e41ffSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
1143b2b77a04SHong Zhang 
1144b2b77a04SHong Zhang   PetscFunctionBegin;
11453c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
11463c3a9c75SAmlan Barua //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
11473c3a9c75SAmlan Barua //#endif
1148b2b77a04SHong Zhang 
1149b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
115011902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
115154dd5118SAmlan Barua   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1152c92418ccSAmlan Barua 
115311902ff2SHong Zhang   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
115411902ff2SHong Zhang   pdim[0] = dim[0];
115511902ff2SHong Zhang   for(ctr=1;ctr<ndim;ctr++)
115611902ff2SHong Zhang       {
11576882372aSHong Zhang           partial_dim *= dim[ctr];
115811902ff2SHong Zhang           pdim[ctr] = dim[ctr];
11596882372aSHong Zhang       }
11603c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
11613c3a9c75SAmlan Barua //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
11623c3a9c75SAmlan Barua //#endif
11633c3a9c75SAmlan Barua 
116411902ff2SHong Zhang //  printf("partial dimension is %d",partial_dim);
1165b2b77a04SHong Zhang   if (size == 1) {
1166e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1167b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1168b2b77a04SHong Zhang     n = N;
1169e81bb599SAmlan Barua #else
1170e81bb599SAmlan Barua     int tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1];
1171e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1172e81bb599SAmlan Barua     n = tot_dim;
1173e81bb599SAmlan Barua #endif
1174e81bb599SAmlan Barua 
1175b2b77a04SHong Zhang   } else {
1176*b223cc97SAmlan Barua     ptrdiff_t alloc_local,local_n0,local_0_start;//local_n1,local_1_end;
1177b2b77a04SHong Zhang     switch (ndim){
1178b2b77a04SHong Zhang     case 1:
11793c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11803c3a9c75SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1181e5d7f247SAmlan Barua #else
11826882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
11836882372aSHong Zhang       n = (PetscInt)local_n0;
11846882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1185e5d7f247SAmlan Barua #endif
11863c3a9c75SAmlan Barua //      PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW);
1187b2b77a04SHong Zhang       break;
1188b2b77a04SHong Zhang     case 2:
11895b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
1190b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1191b2b77a04SHong Zhang       /*
1192b2b77a04SHong Zhang        PetscMPIInt    rank;
1193b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1194b2b77a04SHong Zhang        PetscSynchronizedFlush(comm);
1195b2b77a04SHong Zhang        */
1196b2b77a04SHong Zhang       n = (PetscInt)local_n0*dim[1];
1197b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
11985b3e41ffSAmlan Barua #else
11995b3e41ffSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
12005b3e41ffSAmlan Barua       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
12015b3e41ffSAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
12025b3e41ffSAmlan Barua #endif
1203b2b77a04SHong Zhang       break;
1204b2b77a04SHong Zhang     case 3:
120511902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
120651d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
120751d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
12086882372aSHong Zhang       n = (PetscInt)local_n0*dim[1]*dim[2];
12096882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
121051d1eed7SAmlan Barua #else
121151d1eed7SAmlan Barua       printf("The code comes here\n");
121251d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
121351d1eed7SAmlan Barua       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
121451d1eed7SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
121551d1eed7SAmlan Barua #endif
1216b2b77a04SHong Zhang       break;
1217b2b77a04SHong Zhang     default:
1218b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
121911902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1220c92418ccSAmlan Barua //      printf("The value of alloc local is %ld from process %d\n",alloc_local,rank);
122111902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
12226882372aSHong Zhang       n = (PetscInt)local_n0*partial_dim;
122311902ff2SHong Zhang //      printf("New partial dimension is %d %d %d",n,N,ndim);
12246882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1225b3a17365SAmlan Barua #else
1226b3a17365SAmlan Barua       temp = pdim[ndim-1];
1227b3a17365SAmlan Barua       pdim[ndim-1]= temp/2 + 1;
1228b3a17365SAmlan Barua       printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]);
1229b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1230b3a17365SAmlan Barua       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1231b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1232b3a17365SAmlan Barua       pdim[ndim-1] = temp;
1233b3a17365SAmlan Barua       printf("For Multi dim case n = %d, N1  = %d\n",n,N1);
1234b3a17365SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1235b3a17365SAmlan Barua #endif
1236b2b77a04SHong Zhang       break;
1237b2b77a04SHong Zhang     }
1238b2b77a04SHong Zhang   }
1239b2b77a04SHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1240b2b77a04SHong Zhang   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1241b2b77a04SHong Zhang   fft->data = (void*)fftw;
1242b2b77a04SHong Zhang 
1243b2b77a04SHong Zhang   fft->n           = n;
1244c92418ccSAmlan Barua   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1245e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
1246c92418ccSAmlan Barua   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1247b1301b2eSHong Zhang   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1248c92418ccSAmlan Barua 
1249b2b77a04SHong Zhang   fftw->p_forward  = 0;
1250b2b77a04SHong Zhang   fftw->p_backward = 0;
1251b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1252b2b77a04SHong Zhang 
1253b2b77a04SHong Zhang   if (size == 1){
1254b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1255b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1256b2b77a04SHong Zhang   } else {
1257b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1258b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1259b2b77a04SHong Zhang   }
1260b2b77a04SHong Zhang   fft->matdestroy          = MatDestroy_FFTW;
1261b2b77a04SHong Zhang   A->ops->getvecs       = MatGetVecs_FFTW;
1262b2b77a04SHong Zhang   A->assembled          = PETSC_TRUE;
12633c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12643c3a9c75SAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
12655b3e41ffSAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
12663c3a9c75SAmlan Barua #endif
1267b2b77a04SHong Zhang 
1268b2b77a04SHong Zhang   /* get runtime options */
1269b2b77a04SHong Zhang   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1270b2b77a04SHong Zhang     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1271b2b77a04SHong Zhang     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1272b2b77a04SHong Zhang   PetscOptionsEnd();
1273b2b77a04SHong Zhang   PetscFunctionReturn(0);
1274b2b77a04SHong Zhang }
1275b2b77a04SHong Zhang EXTERN_C_END
12763c3a9c75SAmlan Barua 
12773c3a9c75SAmlan Barua 
12783c3a9c75SAmlan Barua 
12793c3a9c75SAmlan Barua 
1280