xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision e5d7f247475a58b54eed84ab1606c36769b8e4b7)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t   ndim_fftw,*dim_fftw;
14*e5d7f247SAmlan Barua   PetscInt   partial_dim;
15b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
16b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
19b2b77a04SHong Zhang } Mat_FFTW;
20b2b77a04SHong Zhang 
21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
28b2b77a04SHong Zhang 
29b2b77a04SHong Zhang #undef __FUNCT__
30b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
31b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
32b2b77a04SHong Zhang {
33b2b77a04SHong Zhang   PetscErrorCode ierr;
34b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
35b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
36b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
37b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
38b2b77a04SHong Zhang 
39b2b77a04SHong Zhang   PetscFunctionBegin;
40b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
41b9ae062cSHong Zhang 
42b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
43b2b77a04SHong Zhang #endif
44b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
45b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
46b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
47b2b77a04SHong Zhang     switch (ndim){
48b2b77a04SHong Zhang     case 1:
49b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
50b2b77a04SHong Zhang       break;
51b2b77a04SHong Zhang     case 2:
52b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
53b2b77a04SHong Zhang       break;
54b2b77a04SHong Zhang     case 3:
55b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
56b2b77a04SHong Zhang       break;
57b2b77a04SHong Zhang     default:
58b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
59b2b77a04SHong Zhang       break;
60b2b77a04SHong Zhang     }
61b2b77a04SHong Zhang     fftw->finarray  = x_array;
62b2b77a04SHong Zhang     fftw->foutarray = y_array;
63b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
64b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
65b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
66b2b77a04SHong Zhang   } else { /* use existing plan */
67b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
68b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
69b2b77a04SHong Zhang     } else {
70b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
71b2b77a04SHong Zhang     }
72b2b77a04SHong Zhang   }
73b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
74b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
75b2b77a04SHong Zhang   PetscFunctionReturn(0);
76b2b77a04SHong Zhang }
77b2b77a04SHong Zhang 
78b2b77a04SHong Zhang #undef __FUNCT__
79b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
80b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
81b2b77a04SHong Zhang {
82b2b77a04SHong Zhang   PetscErrorCode ierr;
83b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
84b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
85b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
86b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
87b2b77a04SHong Zhang 
88b2b77a04SHong Zhang   PetscFunctionBegin;
89b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
90b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
91b2b77a04SHong Zhang #endif
92b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
93b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
94b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
95b2b77a04SHong Zhang     switch (ndim){
96b2b77a04SHong Zhang     case 1:
97b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
98b2b77a04SHong Zhang       break;
99b2b77a04SHong Zhang     case 2:
100b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
101b2b77a04SHong Zhang       break;
102b2b77a04SHong Zhang     case 3:
103b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
104b2b77a04SHong Zhang       break;
105b2b77a04SHong Zhang     default:
106b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
107b2b77a04SHong Zhang       break;
108b2b77a04SHong Zhang     }
109b2b77a04SHong Zhang     fftw->binarray  = x_array;
110b2b77a04SHong Zhang     fftw->boutarray = y_array;
111b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
112b2b77a04SHong Zhang   } else { /* use existing plan */
113b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
114b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
115b2b77a04SHong Zhang     } else {
116b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
117b2b77a04SHong Zhang     }
118b2b77a04SHong Zhang   }
119b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
120b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
121b2b77a04SHong Zhang   PetscFunctionReturn(0);
122b2b77a04SHong Zhang }
123b2b77a04SHong Zhang 
124b2b77a04SHong Zhang #undef __FUNCT__
125b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
126b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
127b2b77a04SHong Zhang {
128b2b77a04SHong Zhang   PetscErrorCode ierr;
129b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
130b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
131b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
132c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
133b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
134c92418ccSAmlan Barua // PetscInt ctr;
135c92418ccSAmlan Barua //  ptrdiff_t      ndim1=(ptrdiff_t) ndim,*pdim;
136c92418ccSAmlan Barua //  ndim1=(ptrdiff_t) ndim;
137c92418ccSAmlan Barua //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
138c92418ccSAmlan Barua 
139c92418ccSAmlan Barua //  for(ctr=0;ctr<ndim;ctr++)
140c92418ccSAmlan Barua //     {
141c92418ccSAmlan Barua //      pdim[ctr] = dim[ctr];
142c92418ccSAmlan Barua //     }
143b2b77a04SHong Zhang 
144b2b77a04SHong Zhang   PetscFunctionBegin;
1455b3e41ffSAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
1465b3e41ffSAmlan Barua //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
1475b3e41ffSAmlan Barua //#endif
148c92418ccSAmlan Barua //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
149c92418ccSAmlan Barua //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
15011902ff2SHong Zhang 
151b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
152b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
153b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
154b2b77a04SHong Zhang     switch (ndim){
155b2b77a04SHong Zhang     case 1:
1563c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
157b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1583c3a9c75SAmlan Barua #endif
159b2b77a04SHong Zhang       break;
160b2b77a04SHong Zhang     case 2:
1613c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
162b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1633c3a9c75SAmlan Barua #else
1645b3e41ffSAmlan Barua       printf("The code comes here \n");
1653c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1663c3a9c75SAmlan Barua #endif
167b2b77a04SHong Zhang       break;
168b2b77a04SHong Zhang     case 3:
1693c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
170b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1713c3a9c75SAmlan Barua #else
17251d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1733c3a9c75SAmlan Barua #endif
174b2b77a04SHong Zhang       break;
175b2b77a04SHong Zhang     default:
1763c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
177c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
1783c3a9c75SAmlan Barua #else
1793c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
1803c3a9c75SAmlan Barua #endif
18111902ff2SHong Zhang  //     fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
182b2b77a04SHong Zhang       break;
183b2b77a04SHong Zhang     }
184b2b77a04SHong Zhang     fftw->finarray  = x_array;
185b2b77a04SHong Zhang     fftw->foutarray = y_array;
186b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
187b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
188b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
189b2b77a04SHong Zhang   } else { /* use existing plan */
190b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
191b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
192b2b77a04SHong Zhang     } else {
193b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
194b2b77a04SHong Zhang     }
195b2b77a04SHong Zhang   }
196b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
197b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
198b2b77a04SHong Zhang   PetscFunctionReturn(0);
199b2b77a04SHong Zhang }
200b2b77a04SHong Zhang 
201b2b77a04SHong Zhang #undef __FUNCT__
202b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
203b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
204b2b77a04SHong Zhang {
205b2b77a04SHong Zhang   PetscErrorCode ierr;
206b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
207b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
208b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
209c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
210b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
211c92418ccSAmlan Barua //  PetscInt       ctr;
212c92418ccSAmlan Barua //  ptrdiff_t      ndim1=(ptrdiff_t)ndim,*pdim;
213c92418ccSAmlan Barua //  ndim1=(ptrdiff_t) ndim;
214c92418ccSAmlan Barua //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
215c92418ccSAmlan Barua 
216c92418ccSAmlan Barua //  for(ctr=0;ctr<ndim;ctr++)
217c92418ccSAmlan Barua //     {
218c92418ccSAmlan Barua //      pdim[ctr] = dim[ctr];
219c92418ccSAmlan Barua //     }
220b2b77a04SHong Zhang 
221b2b77a04SHong Zhang   PetscFunctionBegin;
2223c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
2233c3a9c75SAmlan Barua //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
2243c3a9c75SAmlan Barua //#endif
225c92418ccSAmlan Barua //  ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr);
226c92418ccSAmlan Barua // should pdim be a member of Mat_FFTW?
227c92418ccSAmlan Barua //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
22811902ff2SHong Zhang 
229b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
230b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
231b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
232b2b77a04SHong Zhang     switch (ndim){
233b2b77a04SHong Zhang     case 1:
2343c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
235b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2363c3a9c75SAmlan Barua #endif
237b2b77a04SHong Zhang       break;
238b2b77a04SHong Zhang     case 2:
2393c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
240b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2413c3a9c75SAmlan Barua #else
2423c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2433c3a9c75SAmlan Barua #endif
244b2b77a04SHong Zhang       break;
245b2b77a04SHong Zhang     case 3:
2463c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
247b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2483c3a9c75SAmlan Barua #else
2493c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2503c3a9c75SAmlan Barua #endif
251b2b77a04SHong Zhang       break;
252b2b77a04SHong Zhang     default:
2533c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
254c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
2553c3a9c75SAmlan Barua #else
2563c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
2573c3a9c75SAmlan Barua #endif
258c92418ccSAmlan Barua //      fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
259b2b77a04SHong Zhang       break;
260b2b77a04SHong Zhang     }
261b2b77a04SHong Zhang     fftw->binarray  = x_array;
262b2b77a04SHong Zhang     fftw->boutarray = y_array;
263b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
264b2b77a04SHong Zhang   } else { /* use existing plan */
265b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
266b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
267b2b77a04SHong Zhang     } else {
268b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
269b2b77a04SHong Zhang     }
270b2b77a04SHong Zhang   }
271b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
272b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
273b2b77a04SHong Zhang   PetscFunctionReturn(0);
274b2b77a04SHong Zhang }
275b2b77a04SHong Zhang 
276b2b77a04SHong Zhang #undef __FUNCT__
277b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
278b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
279b2b77a04SHong Zhang {
280b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
281b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
282b2b77a04SHong Zhang   PetscErrorCode ierr;
283b2b77a04SHong Zhang 
284b2b77a04SHong Zhang   PetscFunctionBegin;
285b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
286b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
287b2b77a04SHong Zhang #endif
288b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
289b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
290b1301b2eSHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
291bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
292b2b77a04SHong Zhang   PetscFunctionReturn(0);
293b2b77a04SHong Zhang }
294b2b77a04SHong Zhang 
295c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
296b2b77a04SHong Zhang #undef __FUNCT__
297b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
298b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
299b2b77a04SHong Zhang {
300b2b77a04SHong Zhang   PetscErrorCode  ierr;
301b2b77a04SHong Zhang   PetscScalar     *array;
302b2b77a04SHong Zhang 
303b2b77a04SHong Zhang   PetscFunctionBegin;
304b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
305b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
306b2b77a04SHong Zhang #endif
307b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
308b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
309b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
310b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
311b2b77a04SHong Zhang   PetscFunctionReturn(0);
312b2b77a04SHong Zhang }
313b2b77a04SHong Zhang 
314b2b77a04SHong Zhang #undef __FUNCT__
3153c3a9c75SAmlan Barua #define __FUNCT__ "MatGetVecs1DC_FFTW"
316c92418ccSAmlan Barua /*
317c92418ccSAmlan Barua    MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the
318c92418ccSAmlan Barua      parallel layout determined by FFTW-1D
319c92418ccSAmlan Barua 
320c92418ccSAmlan Barua */
321c92418ccSAmlan Barua PetscErrorCode  MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout)
322c92418ccSAmlan Barua {
323c92418ccSAmlan Barua   PetscErrorCode ierr;
324c92418ccSAmlan Barua   PetscMPIInt    size,rank;
325c92418ccSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
326c92418ccSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
327c92418ccSAmlan Barua //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
328c92418ccSAmlan Barua   PetscInt       N=fft->N;
329c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
330c92418ccSAmlan Barua   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
331c92418ccSAmlan Barua   ptrdiff_t      f_local_n1,f_local_1_end;
332c92418ccSAmlan Barua   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
333c92418ccSAmlan Barua   ptrdiff_t      b_local_n1,b_local_1_end;
334c92418ccSAmlan Barua   fftw_complex   *data_fin,*data_fout,*data_bin,*data_bout;
335c92418ccSAmlan Barua 
336c92418ccSAmlan Barua   PetscFunctionBegin;
337c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
338c92418ccSAmlan Barua   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
339c92418ccSAmlan Barua #endif
340c92418ccSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
341c92418ccSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
342c92418ccSAmlan Barua   if (size == 1){
343c92418ccSAmlan Barua     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
344c92418ccSAmlan Barua   }
345c92418ccSAmlan Barua   else {
346c92418ccSAmlan Barua       if (ndim>1){
347c92418ccSAmlan Barua         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
348c92418ccSAmlan Barua       else {
349c92418ccSAmlan Barua           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
350c92418ccSAmlan Barua           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
351c92418ccSAmlan Barua           if (fin) {
352c92418ccSAmlan Barua             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
353c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
354c92418ccSAmlan Barua             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
355c92418ccSAmlan Barua           }
356c92418ccSAmlan Barua           if (fout) {
357c92418ccSAmlan Barua             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
358c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
359c92418ccSAmlan Barua             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
360c92418ccSAmlan Barua           }
361c92418ccSAmlan Barua           if (bin) {
362c92418ccSAmlan Barua             data_bin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
363c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr);
364c92418ccSAmlan Barua             (*bin)->ops->destroy   = VecDestroy_MPIFFTW;
365c92418ccSAmlan Barua           }
366c92418ccSAmlan Barua           if (bout) {
367c92418ccSAmlan Barua             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
368c92418ccSAmlan Barua             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
369c92418ccSAmlan Barua             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
370c92418ccSAmlan Barua           }
371c92418ccSAmlan Barua   }
372c92418ccSAmlan Barua   if (fin){
373c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
374c92418ccSAmlan Barua   }
375c92418ccSAmlan Barua   if (fout){
376c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
377c92418ccSAmlan Barua   }
378c92418ccSAmlan Barua   if (bin){
379c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr);
380c92418ccSAmlan Barua   }
381c92418ccSAmlan Barua   if (bout){
382c92418ccSAmlan Barua     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
383c92418ccSAmlan Barua   }
384c92418ccSAmlan Barua   PetscFunctionReturn(0);
385c92418ccSAmlan Barua }
386c92418ccSAmlan Barua 
387c92418ccSAmlan Barua 
388c92418ccSAmlan Barua }
3893c3a9c75SAmlan Barua 
390c92418ccSAmlan Barua #undef __FUNCT__
391b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW"
392b2b77a04SHong Zhang /*
393b2b77a04SHong Zhang    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
394b2b77a04SHong Zhang      parallel layout determined by FFTW
395b2b77a04SHong Zhang 
396b2b77a04SHong Zhang    Collective on Mat
397b2b77a04SHong Zhang 
398b2b77a04SHong Zhang    Input Parameter:
399b2b77a04SHong Zhang .  mat - the matrix
400b2b77a04SHong Zhang 
401b2b77a04SHong Zhang    Output Parameter:
402b2b77a04SHong Zhang +   fin - (optional) input vector of forward FFTW
403b2b77a04SHong Zhang -   fout - (optional) output vector of forward FFTW
404b2b77a04SHong Zhang 
405b2b77a04SHong Zhang   Level: advanced
406b2b77a04SHong Zhang 
407b2b77a04SHong Zhang .seealso: MatCreateFFTW()
408b2b77a04SHong Zhang */
409b2b77a04SHong Zhang PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
410b2b77a04SHong Zhang {
411b2b77a04SHong Zhang   PetscErrorCode ierr;
412b2b77a04SHong Zhang   PetscMPIInt    size,rank;
413b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
414b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
415c92418ccSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
4163c3a9c75SAmlan Barua   PetscInt       N=fft->N, N1, n1,vsize;
417b2b77a04SHong Zhang 
418b2b77a04SHong Zhang   PetscFunctionBegin;
4193c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
4203c3a9c75SAmlan Barua //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
4213c3a9c75SAmlan Barua //#endif
422b2b77a04SHong Zhang   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
423b2b77a04SHong Zhang   PetscValidType(A,1);
424b2b77a04SHong Zhang 
425b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
426b2b77a04SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
427b2b77a04SHong Zhang   if (size == 1){ /* sequential case */
428*e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX)
429b2b77a04SHong Zhang     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
430b2b77a04SHong Zhang     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
431*e5d7f247SAmlan Barua #else
432*e5d7f247SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
433*e5d7f247SAmlan Barua     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*(N/2+1),fout);CHKERRQ(ierr);}
434*e5d7f247SAmlan Barua #endif
4353c3a9c75SAmlan Barua     printf("The code successfully comes at the end of the routine with one processor\n");
436b2b77a04SHong Zhang   } else {        /* mpi case */
437b2b77a04SHong Zhang     ptrdiff_t      alloc_local,local_n0,local_0_start;
4386882372aSHong Zhang     ptrdiff_t      local_n1,local_1_end;
439c92418ccSAmlan Barua     PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
440b2b77a04SHong Zhang     fftw_complex   *data_fin,*data_fout;
44151d1eed7SAmlan Barua     double         *data_finr ;
442b3a17365SAmlan Barua     ptrdiff_t      local_1_start,temp;
443c92418ccSAmlan Barua //    PetscInt ctr;
444c92418ccSAmlan Barua //    ptrdiff_t      ndim1,*pdim;
445c92418ccSAmlan Barua //    ndim1=(ptrdiff_t) ndim;
446c92418ccSAmlan Barua //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
44711902ff2SHong Zhang 
448c92418ccSAmlan Barua //    for(ctr=0;ctr<ndim;ctr++)
449f76f76beSAmlan Barua //        {k
450c92418ccSAmlan Barua //           pdim[ctr] = dim[ctr];
451c92418ccSAmlan Barua //       }
452b2b77a04SHong Zhang 
453f76f76beSAmlan Barua 
454f76f76beSAmlan Barua 
455b2b77a04SHong Zhang     switch (ndim){
456b2b77a04SHong Zhang     case 1:
4576882372aSHong Zhang       /* Get local size */
458*e5d7f247SAmlan Barua       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
459c92418ccSAmlan Barua //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
4606882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
4616882372aSHong Zhang       if (fin) {
4626882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4636882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
4646882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4656882372aSHong Zhang       }
4666882372aSHong Zhang       if (fout) {
4676882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
4686882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
4696882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
4706882372aSHong Zhang       }
471b2b77a04SHong Zhang       break;
472b2b77a04SHong Zhang     case 2:
4733c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4743c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4753c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
4763c3a9c75SAmlan Barua       if (fin) {
4773c3a9c75SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
47854dd5118SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
4793c3a9c75SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
48009dd8a53SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
4813c3a9c75SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
4823c3a9c75SAmlan Barua       }
4833c3a9c75SAmlan Barua       if (fout) {
48409dd8a53SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
48509dd8a53SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
4863c3a9c75SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
4873c3a9c75SAmlan Barua       }
488f76f76beSAmlan Barua       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
4893c3a9c75SAmlan Barua 
4903c3a9c75SAmlan Barua #else
491b2b77a04SHong Zhang       /* Get local size */
49209dd8a53SAmlan Barua      printf("Hope this does not come here");
493b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
494b2b77a04SHong Zhang       if (fin) {
495b2b77a04SHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
496b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
497b2b77a04SHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
498b2b77a04SHong Zhang       }
499b2b77a04SHong Zhang       if (fout) {
500b2b77a04SHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
501b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
502b2b77a04SHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
503b2b77a04SHong Zhang       }
5043c3a9c75SAmlan Barua      printf("Hope this does not come here");
5053c3a9c75SAmlan Barua #endif
506b2b77a04SHong Zhang       break;
507b2b77a04SHong Zhang     case 3:
5086882372aSHong Zhang       /* Get local size */
5093c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
51051d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
51151d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
51251d1eed7SAmlan Barua       if (fin) {
51351d1eed7SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
51451d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
51551d1eed7SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
51651d1eed7SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
51751d1eed7SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
51851d1eed7SAmlan Barua       }
51951d1eed7SAmlan Barua       if (fout) {
52051d1eed7SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
52151d1eed7SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
52251d1eed7SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
52351d1eed7SAmlan Barua       }
52451d1eed7SAmlan Barua       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
52551d1eed7SAmlan Barua 
52651d1eed7SAmlan Barua 
5273c3a9c75SAmlan Barua #else
5286882372aSHong Zhang       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
52911902ff2SHong Zhang //      printf("The quantity n is %d",n);
5306882372aSHong Zhang       if (fin) {
5316882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5326882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5336882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5346882372aSHong Zhang       }
5356882372aSHong Zhang       if (fout) {
5366882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5376882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5386882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5396882372aSHong Zhang       }
5403c3a9c75SAmlan Barua #endif
541b2b77a04SHong Zhang       break;
542b2b77a04SHong Zhang     default:
5436882372aSHong Zhang       /* Get local size */
5443c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
545b3a17365SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
546b3a17365SAmlan Barua       printf("The value of temp is %ld\n",temp);
547b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
548b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
549b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
550b3a17365SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
551b3a17365SAmlan Barua       if (fin) {
552b3a17365SAmlan Barua         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
553b3a17365SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
554b3a17365SAmlan Barua         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
555b3a17365SAmlan Barua         //printf("The code comes here with vector size %d\n",vsize);
556b3a17365SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
557b3a17365SAmlan Barua       }
558b3a17365SAmlan Barua       if (fout) {
559b3a17365SAmlan Barua         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
560b3a17365SAmlan Barua         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
561b3a17365SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
562b3a17365SAmlan Barua       }
563b3a17365SAmlan Barua 
5643c3a9c75SAmlan Barua #else
565c92418ccSAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
56611902ff2SHong Zhang //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
56711902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
56811902ff2SHong Zhang //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
56911902ff2SHong Zhang //      for(i=0;i<ndim;i++)
57011902ff2SHong Zhang //         {
57111902ff2SHong Zhang //          pdim[i]=dim[i];printf("%d",pdim[i]);
57211902ff2SHong Zhang //         }
57311902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
57411902ff2SHong Zhang //      printf("The quantity n is %d",n);
5756882372aSHong Zhang       if (fin) {
5766882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5776882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5786882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5796882372aSHong Zhang       }
5806882372aSHong Zhang       if (fout) {
5816882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5826882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5836882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5846882372aSHong Zhang       }
5853c3a9c75SAmlan Barua #endif
586b2b77a04SHong Zhang       break;
587b2b77a04SHong Zhang     }
588b2b77a04SHong Zhang   }
58954dd5118SAmlan Barua //  if (fin){
59054dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
59154dd5118SAmlan Barua //  }
59254dd5118SAmlan Barua //  if (fout){
59354dd5118SAmlan Barua //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
59454dd5118SAmlan Barua //  }
595b2b77a04SHong Zhang   PetscFunctionReturn(0);
596b2b77a04SHong Zhang }
597b2b77a04SHong Zhang 
598b2b77a04SHong Zhang #undef __FUNCT__
5993c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT"
6003c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
6013c3a9c75SAmlan Barua {
6023c3a9c75SAmlan Barua   PetscErrorCode ierr;
6033c3a9c75SAmlan Barua   PetscFunctionBegin;
6043c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
6053c3a9c75SAmlan Barua   PetscFunctionReturn(0);
6063c3a9c75SAmlan Barua }
60754efbe56SHong Zhang 
608b2b77a04SHong Zhang /*
6093c3a9c75SAmlan Barua       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block
6103c3a9c75SAmlan Barua   Input A, x, y
6113c3a9c75SAmlan Barua   A - FFTW matrix
61254dd5118SAmlan Barua   x - user data
613b2b77a04SHong Zhang   Options Database Keys:
614b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags
615b2b77a04SHong Zhang 
616b2b77a04SHong Zhang    Level: intermediate
617b2b77a04SHong Zhang 
618b2b77a04SHong Zhang */
6193c3a9c75SAmlan Barua 
6203c3a9c75SAmlan Barua EXTERN_C_BEGIN
6213c3a9c75SAmlan Barua #undef __FUNCT__
6223c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW"
6233c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
6243c3a9c75SAmlan Barua {
6253c3a9c75SAmlan Barua   PetscErrorCode ierr;
6263c3a9c75SAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
6273c3a9c75SAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
6283c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
6293c3a9c75SAmlan Barua   PetscInt       N=fft->N, N1, n1 ,NM;
6303c3a9c75SAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
631f76f76beSAmlan Barua   PetscInt       low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4;
632*e5d7f247SAmlan Barua   PetscInt       i,j,k,rank,size,partial_dim;
6333c3a9c75SAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
634*e5d7f247SAmlan Barua   ptrdiff_t      local_n1,local_1_start,temp;
6353c3a9c75SAmlan Barua   VecScatter     vecscat;
6363c3a9c75SAmlan Barua   IS             list1,list2;
6373c3a9c75SAmlan Barua 
638f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
639f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
6403c3a9c75SAmlan Barua   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
641f76f76beSAmlan Barua   printf("Local ownership starts at %d\n",low);
6423c3a9c75SAmlan Barua 
6433c3a9c75SAmlan Barua  switch (ndim){
6443c3a9c75SAmlan Barua  case 1:
645*e5d7f247SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
6463c3a9c75SAmlan Barua   break;
6473c3a9c75SAmlan Barua  case 2:
6483c3a9c75SAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
6493c3a9c75SAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
6503c3a9c75SAmlan Barua 
6515b3e41ffSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
6525b3e41ffSAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
6535b3e41ffSAmlan Barua       printf("Val local_0_start = %ld\n",local_0_start);
6543c3a9c75SAmlan Barua 
6553c3a9c75SAmlan Barua       if (dim[1]%2==0)
6563c3a9c75SAmlan Barua         NM = dim[1]+2;
6573c3a9c75SAmlan Barua       else
6583c3a9c75SAmlan Barua         NM = dim[1]+1;
6593c3a9c75SAmlan Barua 
6603c3a9c75SAmlan Barua       for (i=0;i<local_n0;i++){
6613c3a9c75SAmlan Barua          for (j=0;j<dim[1];j++){
6623c3a9c75SAmlan Barua             tempindx = i*dim[1] + j;
6633c3a9c75SAmlan Barua             tempindx1 = i*NM + j;
6645b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
6653c3a9c75SAmlan Barua             indx2[tempindx]=low+tempindx1;
6665b3e41ffSAmlan Barua            // printf("Val tempindx1 = %d\n",tempindx1);
6675b3e41ffSAmlan Barua   //          printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
6685b3e41ffSAmlan Barua   //          printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
6695b3e41ffSAmlan Barua   //          printf("-------------------------\n",indx2[tempindx],rank);
6703c3a9c75SAmlan Barua         }
6713c3a9c75SAmlan Barua      }
6723c3a9c75SAmlan Barua 
6733c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
6743c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
6753c3a9c75SAmlan Barua 
676f76f76beSAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
677f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
678f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
679f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
6803c3a9c75SAmlan Barua       break;
6813c3a9c75SAmlan Barua 
6823c3a9c75SAmlan Barua  case 3:
68351d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
68451d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
68551d1eed7SAmlan Barua 
68651d1eed7SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
68751d1eed7SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
68851d1eed7SAmlan Barua       printf("Val local_0_start = %ld\n",local_0_start);
68951d1eed7SAmlan Barua 
69051d1eed7SAmlan Barua       if (dim[2]%2==0)
69151d1eed7SAmlan Barua         NM = dim[2]+2;
69251d1eed7SAmlan Barua       else
69351d1eed7SAmlan Barua         NM = dim[2]+1;
69451d1eed7SAmlan Barua 
69551d1eed7SAmlan Barua       for (i=0;i<local_n0;i++){
69651d1eed7SAmlan Barua          for (j=0;j<dim[1];j++){
69751d1eed7SAmlan Barua             for (k=0;k<dim[2];k++){
69851d1eed7SAmlan Barua                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
69951d1eed7SAmlan Barua                tempindx1 = i*dim[1]*NM + j*NM + k;
70051d1eed7SAmlan Barua                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
70151d1eed7SAmlan Barua                indx2[tempindx]=low+tempindx1;
70251d1eed7SAmlan Barua             }
70351d1eed7SAmlan Barua            // printf("Val tempindx1 = %d\n",tempindx1);
70451d1eed7SAmlan Barua            // printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
70551d1eed7SAmlan Barua            // printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
70651d1eed7SAmlan Barua            // printf("-------------------------\n",indx2[tempindx],rank);
70751d1eed7SAmlan Barua         }
70851d1eed7SAmlan Barua      }
70951d1eed7SAmlan Barua 
71051d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
71151d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
71251d1eed7SAmlan Barua 
71351d1eed7SAmlan Barua       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
71451d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
71551d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
71651d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
7173c3a9c75SAmlan Barua       break;
7183c3a9c75SAmlan Barua 
7193c3a9c75SAmlan Barua  default:
720*e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
721*e5d7f247SAmlan Barua       printf("The value of temp is %ld\n",temp);
722*e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
723*e5d7f247SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
724*e5d7f247SAmlan Barua       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
725*e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
726*e5d7f247SAmlan Barua 
727*e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
728*e5d7f247SAmlan Barua       printf("The value of partial dim is %d\n",partial_dim);
729*e5d7f247SAmlan Barua 
730*e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
731*e5d7f247SAmlan Barua       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
732*e5d7f247SAmlan Barua       printf("Val local_0_start = %ld\n",local_0_start);
733*e5d7f247SAmlan Barua 
734*e5d7f247SAmlan Barua       if (dim[ndim-1]%2==0)
735*e5d7f247SAmlan Barua         NM = dim[ndim-1]+2;
736*e5d7f247SAmlan Barua       else
737*e5d7f247SAmlan Barua         NM = dim[ndim-1]+1;
738*e5d7f247SAmlan Barua 
739*e5d7f247SAmlan Barua 
740*e5d7f247SAmlan Barua 
7413c3a9c75SAmlan Barua   break;
7423c3a9c75SAmlan Barua  }
7433c3a9c75SAmlan Barua 
7443c3a9c75SAmlan Barua  return 0;
7453c3a9c75SAmlan Barua }
7463c3a9c75SAmlan Barua EXTERN_C_END
7473c3a9c75SAmlan Barua 
7483c3a9c75SAmlan Barua #undef __FUNCT__
7493c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT"
7503c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
7513c3a9c75SAmlan Barua {
7523c3a9c75SAmlan Barua   PetscErrorCode ierr;
7533c3a9c75SAmlan Barua   PetscFunctionBegin;
7543c3a9c75SAmlan Barua   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
7553c3a9c75SAmlan Barua   PetscFunctionReturn(0);
7563c3a9c75SAmlan Barua }
75754efbe56SHong Zhang 
7585b3e41ffSAmlan Barua /*
7595b3e41ffSAmlan Barua       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
7605b3e41ffSAmlan Barua   Input A, x, y
7615b3e41ffSAmlan Barua   A - FFTW matrix
7625b3e41ffSAmlan Barua   x - FFTW vector
7635b3e41ffSAmlan Barua   y - PETSc vector that user can read
7645b3e41ffSAmlan Barua   Options Database Keys:
7655b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
7665b3e41ffSAmlan Barua 
7675b3e41ffSAmlan Barua    Level: intermediate
7685b3e41ffSAmlan Barua 
7693c3a9c75SAmlan Barua */
7703c3a9c75SAmlan Barua 
7713c3a9c75SAmlan Barua EXTERN_C_BEGIN
7723c3a9c75SAmlan Barua #undef __FUNCT__
7735b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW"
7745b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
7755b3e41ffSAmlan Barua {
7765b3e41ffSAmlan Barua   PetscErrorCode ierr;
7775b3e41ffSAmlan Barua   MPI_Comm       comm=((PetscObject)A)->comm;
7785b3e41ffSAmlan Barua   Mat_FFT        *fft = (Mat_FFT*)A->data;
7795b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
7805b3e41ffSAmlan Barua   PetscInt       N=fft->N, N1, n1 ,NM;
7815b3e41ffSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
7825b3e41ffSAmlan Barua   PetscInt       low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4;
78351d1eed7SAmlan Barua   PetscInt       i,j,k,rank,size;
7845b3e41ffSAmlan Barua   ptrdiff_t      alloc_local,local_n0,local_0_start;
7855b3e41ffSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
7865b3e41ffSAmlan Barua   VecScatter     vecscat;
7875b3e41ffSAmlan Barua   IS             list1,list2;
7885b3e41ffSAmlan Barua 
7895b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
7905b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
791cfe1ae98SAmlan Barua   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);
7925b3e41ffSAmlan Barua   printf("Local ownership starts at %d\n",low);
7935b3e41ffSAmlan Barua 
7945b3e41ffSAmlan Barua  switch (ndim){
7955b3e41ffSAmlan Barua  case 1:
796*e5d7f247SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
7975b3e41ffSAmlan Barua   break;
7985b3e41ffSAmlan Barua  case 2:
7995b3e41ffSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
8005b3e41ffSAmlan Barua       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
8015b3e41ffSAmlan Barua 
8025b3e41ffSAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
8035b3e41ffSAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
8045b3e41ffSAmlan Barua      printf("Val local_0_start = %ld\n",local_0_start);
8055b3e41ffSAmlan Barua 
8065b3e41ffSAmlan Barua      if (dim[1]%2==0)
8075b3e41ffSAmlan Barua       NM = dim[1]+2;
8085b3e41ffSAmlan Barua     else
8095b3e41ffSAmlan Barua       NM = dim[1]+1;
8105b3e41ffSAmlan Barua 
8115b3e41ffSAmlan Barua 
8125b3e41ffSAmlan Barua 
8135b3e41ffSAmlan Barua      for (i=0;i<local_n0;i++){
8145b3e41ffSAmlan Barua         for (j=0;j<dim[1];j++){
8155b3e41ffSAmlan Barua             tempindx = i*dim[1] + j;
8165b3e41ffSAmlan Barua             tempindx1 = i*NM + j;
8175b3e41ffSAmlan Barua             indx1[tempindx]=local_0_start*dim[1]+tempindx;
8185b3e41ffSAmlan Barua             indx2[tempindx]=low+tempindx1;
819cfe1ae98SAmlan Barua        //     printf("Val tempindx1 = %d\n",tempindx1);
820cfe1ae98SAmlan Barua        //     printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
821cfe1ae98SAmlan Barua        //     printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
822cfe1ae98SAmlan Barua        //     printf("-------------------------\n",indx2[tempindx],rank);
8235b3e41ffSAmlan Barua         }
8245b3e41ffSAmlan Barua      }
8255b3e41ffSAmlan Barua 
8265b3e41ffSAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
8275b3e41ffSAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
8285b3e41ffSAmlan Barua 
8295b3e41ffSAmlan Barua      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
8305b3e41ffSAmlan Barua      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8315b3e41ffSAmlan Barua      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8325b3e41ffSAmlan Barua      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
8335b3e41ffSAmlan Barua   break;
8345b3e41ffSAmlan Barua 
8355b3e41ffSAmlan Barua  case 3:
83651d1eed7SAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
83751d1eed7SAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
83851d1eed7SAmlan Barua 
83951d1eed7SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
84051d1eed7SAmlan Barua      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
84151d1eed7SAmlan Barua      printf("Val local_0_start = %ld\n",local_0_start);
84251d1eed7SAmlan Barua 
84351d1eed7SAmlan Barua      if (dim[2]%2==0)
84451d1eed7SAmlan Barua       NM = dim[2]+2;
84551d1eed7SAmlan Barua     else
84651d1eed7SAmlan Barua       NM = dim[2]+1;
84751d1eed7SAmlan Barua 
84851d1eed7SAmlan Barua      for (i=0;i<local_n0;i++){
84951d1eed7SAmlan Barua         for (j=0;j<dim[1];j++){
85051d1eed7SAmlan Barua            for (k=0;k<dim[2];k++){
85151d1eed7SAmlan Barua               tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
85251d1eed7SAmlan Barua               tempindx1 = i*dim[1]*NM + j*NM + k;
85351d1eed7SAmlan Barua               indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
85451d1eed7SAmlan Barua               indx2[tempindx]=low+tempindx1;
85551d1eed7SAmlan Barua            }
85651d1eed7SAmlan Barua         //    printf("Val tempindx1 = %d\n",tempindx1);
85751d1eed7SAmlan Barua         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
85851d1eed7SAmlan Barua         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
85951d1eed7SAmlan Barua         //    printf("-------------------------\n",indx2[tempindx],rank);
86051d1eed7SAmlan Barua         }
86151d1eed7SAmlan Barua      }
86251d1eed7SAmlan Barua 
86351d1eed7SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
86451d1eed7SAmlan Barua      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
86551d1eed7SAmlan Barua 
86651d1eed7SAmlan Barua      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
86751d1eed7SAmlan Barua      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
86851d1eed7SAmlan Barua      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
86951d1eed7SAmlan Barua      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
8705b3e41ffSAmlan Barua   break;
8715b3e41ffSAmlan Barua 
8725b3e41ffSAmlan Barua  default:
8735b3e41ffSAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet");
8745b3e41ffSAmlan Barua   break;
8755b3e41ffSAmlan Barua  }
8765b3e41ffSAmlan Barua  return 0;
8775b3e41ffSAmlan Barua }
8785b3e41ffSAmlan Barua EXTERN_C_END
8795b3e41ffSAmlan Barua 
8805b3e41ffSAmlan Barua EXTERN_C_BEGIN
8815b3e41ffSAmlan Barua #undef __FUNCT__
8823c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW"
8833c3a9c75SAmlan Barua /*
8843c3a9c75SAmlan Barua       MatCreate_FFTW - Creates a matrix object that provides FFT
8853c3a9c75SAmlan Barua   via the external package FFTW
8863c3a9c75SAmlan Barua   Options Database Keys:
8873c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
8883c3a9c75SAmlan Barua 
8893c3a9c75SAmlan Barua    Level: intermediate
8903c3a9c75SAmlan Barua 
8913c3a9c75SAmlan Barua */
8923c3a9c75SAmlan Barua 
893b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A)
894b2b77a04SHong Zhang {
895b2b77a04SHong Zhang   PetscErrorCode ierr;
896b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
897b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
898b2b77a04SHong Zhang   Mat_FFTW       *fftw;
899b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
900b2b77a04SHong Zhang   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
901b2b77a04SHong Zhang   PetscBool      flg;
902b3a17365SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr,N1;
90311902ff2SHong Zhang   PetscMPIInt    size,rank;
904b3a17365SAmlan Barua   ptrdiff_t      *pdim, temp;
9055b3e41ffSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
906b2b77a04SHong Zhang 
907b2b77a04SHong Zhang   PetscFunctionBegin;
9083c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
9093c3a9c75SAmlan Barua //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
9103c3a9c75SAmlan Barua //#endif
911b2b77a04SHong Zhang 
912b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
91311902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
91454dd5118SAmlan Barua   ierr = MPI_Barrier(PETSC_COMM_WORLD);
915c92418ccSAmlan Barua 
91611902ff2SHong Zhang   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
91711902ff2SHong Zhang   pdim[0] = dim[0];
91811902ff2SHong Zhang   for(ctr=1;ctr<ndim;ctr++)
91911902ff2SHong Zhang       {
9206882372aSHong Zhang           partial_dim *= dim[ctr];
92111902ff2SHong Zhang           pdim[ctr] = dim[ctr];
9226882372aSHong Zhang       }
9233c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX)
9243c3a9c75SAmlan Barua //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
9253c3a9c75SAmlan Barua //#endif
9263c3a9c75SAmlan Barua 
92711902ff2SHong Zhang //  printf("partial dimension is %d",partial_dim);
928b2b77a04SHong Zhang   if (size == 1) {
929b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
930b2b77a04SHong Zhang     n = N;
931b2b77a04SHong Zhang   } else {
9326882372aSHong Zhang     ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end;
933b2b77a04SHong Zhang     switch (ndim){
934b2b77a04SHong Zhang     case 1:
9353c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
9363c3a9c75SAmlan Barua   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
937*e5d7f247SAmlan Barua #else
9386882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
9396882372aSHong Zhang       n = (PetscInt)local_n0;
9406882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
941*e5d7f247SAmlan Barua #endif
9423c3a9c75SAmlan Barua //      PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW);
943b2b77a04SHong Zhang       break;
944b2b77a04SHong Zhang     case 2:
9455b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
946b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
947b2b77a04SHong Zhang       /*
948b2b77a04SHong Zhang        PetscMPIInt    rank;
949b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
950b2b77a04SHong Zhang        PetscSynchronizedFlush(comm);
951b2b77a04SHong Zhang        */
952b2b77a04SHong Zhang       n = (PetscInt)local_n0*dim[1];
953b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
9545b3e41ffSAmlan Barua #else
9555b3e41ffSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
9565b3e41ffSAmlan Barua       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
9575b3e41ffSAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
9585b3e41ffSAmlan Barua #endif
959b2b77a04SHong Zhang       break;
960b2b77a04SHong Zhang     case 3:
96111902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
96251d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
96351d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
9646882372aSHong Zhang       n = (PetscInt)local_n0*dim[1]*dim[2];
9656882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
96651d1eed7SAmlan Barua #else
96751d1eed7SAmlan Barua       printf("The code comes here\n");
96851d1eed7SAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
96951d1eed7SAmlan Barua       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
97051d1eed7SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
97151d1eed7SAmlan Barua #endif
972b2b77a04SHong Zhang       break;
973b2b77a04SHong Zhang     default:
974b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
97511902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
976c92418ccSAmlan Barua //      printf("The value of alloc local is %ld from process %d\n",alloc_local,rank);
97711902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
9786882372aSHong Zhang       n = (PetscInt)local_n0*partial_dim;
97911902ff2SHong Zhang //      printf("New partial dimension is %d %d %d",n,N,ndim);
9806882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
981b3a17365SAmlan Barua #else
982b3a17365SAmlan Barua       temp = pdim[ndim-1];
983b3a17365SAmlan Barua       pdim[ndim-1]= temp/2 + 1;
984b3a17365SAmlan Barua       printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]);
985b3a17365SAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
986b3a17365SAmlan Barua       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
987b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
988b3a17365SAmlan Barua       pdim[ndim-1] = temp;
989b3a17365SAmlan Barua       printf("For Multi dim case n = %d, N1  = %d\n",n,N1);
990b3a17365SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
991b3a17365SAmlan Barua #endif
992b2b77a04SHong Zhang       break;
993b2b77a04SHong Zhang     }
994b2b77a04SHong Zhang   }
995b2b77a04SHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
996b2b77a04SHong Zhang   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
997b2b77a04SHong Zhang   fft->data = (void*)fftw;
998b2b77a04SHong Zhang 
999b2b77a04SHong Zhang   fft->n           = n;
1000c92418ccSAmlan Barua   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1001*e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
1002c92418ccSAmlan Barua   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1003b1301b2eSHong Zhang   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1004c92418ccSAmlan Barua 
1005b2b77a04SHong Zhang   fftw->p_forward  = 0;
1006b2b77a04SHong Zhang   fftw->p_backward = 0;
1007b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1008b2b77a04SHong Zhang 
1009b2b77a04SHong Zhang   if (size == 1){
1010b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1011b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1012b2b77a04SHong Zhang   } else {
1013b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1014b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1015b2b77a04SHong Zhang   }
1016b2b77a04SHong Zhang   fft->matdestroy          = MatDestroy_FFTW;
1017b2b77a04SHong Zhang   A->ops->getvecs       = MatGetVecs_FFTW;
1018b2b77a04SHong Zhang   A->assembled          = PETSC_TRUE;
10193c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
10203c3a9c75SAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
10215b3e41ffSAmlan Barua   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
10223c3a9c75SAmlan Barua #endif
1023b2b77a04SHong Zhang 
1024b2b77a04SHong Zhang   /* get runtime options */
1025b2b77a04SHong Zhang   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1026b2b77a04SHong Zhang     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1027b2b77a04SHong Zhang     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1028b2b77a04SHong Zhang   PetscOptionsEnd();
1029b2b77a04SHong Zhang   PetscFunctionReturn(0);
1030b2b77a04SHong Zhang }
1031b2b77a04SHong Zhang EXTERN_C_END
10323c3a9c75SAmlan Barua 
10333c3a9c75SAmlan Barua 
10343c3a9c75SAmlan Barua 
10353c3a9c75SAmlan Barua 
1036