xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 11902ff299104c2db9cea8273c74c98f6bf4ede7)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b2b77a04SHong Zhang   fftw_plan   p_forward,p_backward;
14b2b77a04SHong Zhang   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
15b2b77a04SHong Zhang   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
16b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
17b2b77a04SHong Zhang } Mat_FFTW;
18b2b77a04SHong Zhang 
19b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
20b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
21b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
23b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
24b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
25b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
26b2b77a04SHong Zhang 
27b2b77a04SHong Zhang #undef __FUNCT__
28b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW"
29b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
30b2b77a04SHong Zhang {
31b2b77a04SHong Zhang   PetscErrorCode ierr;
32b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
33b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
34b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
35b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
36b2b77a04SHong Zhang 
37b2b77a04SHong Zhang   PetscFunctionBegin;
38b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
39b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
40b2b77a04SHong Zhang #endif
41b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
42b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
43b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
44b2b77a04SHong Zhang     switch (ndim){
45b2b77a04SHong Zhang     case 1:
46b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
47b2b77a04SHong Zhang       break;
48b2b77a04SHong Zhang     case 2:
49b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
50b2b77a04SHong Zhang       break;
51b2b77a04SHong Zhang     case 3:
52b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
53b2b77a04SHong Zhang       break;
54b2b77a04SHong Zhang     default:
55b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
56b2b77a04SHong Zhang       break;
57b2b77a04SHong Zhang     }
58b2b77a04SHong Zhang     fftw->finarray  = x_array;
59b2b77a04SHong Zhang     fftw->foutarray = y_array;
60b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
61b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
62b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
63b2b77a04SHong Zhang   } else { /* use existing plan */
64b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
65b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
66b2b77a04SHong Zhang     } else {
67b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
68b2b77a04SHong Zhang     }
69b2b77a04SHong Zhang   }
70b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
71b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
72b2b77a04SHong Zhang   PetscFunctionReturn(0);
73b2b77a04SHong Zhang }
74b2b77a04SHong Zhang 
75b2b77a04SHong Zhang #undef __FUNCT__
76b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW"
77b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
78b2b77a04SHong Zhang {
79b2b77a04SHong Zhang   PetscErrorCode ierr;
80b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
81b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
82b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
83b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
84b2b77a04SHong Zhang 
85b2b77a04SHong Zhang   PetscFunctionBegin;
86b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
87b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
88b2b77a04SHong Zhang #endif
89b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
90b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
91b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
92b2b77a04SHong Zhang     switch (ndim){
93b2b77a04SHong Zhang     case 1:
94b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
95b2b77a04SHong Zhang       break;
96b2b77a04SHong Zhang     case 2:
97b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
98b2b77a04SHong Zhang       break;
99b2b77a04SHong Zhang     case 3:
100b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
101b2b77a04SHong Zhang       break;
102b2b77a04SHong Zhang     default:
103b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
104b2b77a04SHong Zhang       break;
105b2b77a04SHong Zhang     }
106b2b77a04SHong Zhang     fftw->binarray  = x_array;
107b2b77a04SHong Zhang     fftw->boutarray = y_array;
108b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
109b2b77a04SHong Zhang   } else { /* use existing plan */
110b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
111b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
112b2b77a04SHong Zhang     } else {
113b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
114b2b77a04SHong Zhang     }
115b2b77a04SHong Zhang   }
116b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
117b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
118b2b77a04SHong Zhang   PetscFunctionReturn(0);
119b2b77a04SHong Zhang }
120b2b77a04SHong Zhang 
121b2b77a04SHong Zhang #undef __FUNCT__
122b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW"
123b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
124b2b77a04SHong Zhang {
125b2b77a04SHong Zhang   PetscErrorCode ierr;
126b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
127b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
128b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
129*11902ff2SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim,ctr;
130b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
131*11902ff2SHong Zhang   ptrdiff_t      ndim1=(ptrdiff_t) ndim,*pdim;
132b2b77a04SHong Zhang 
133b2b77a04SHong Zhang   PetscFunctionBegin;
134b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
135b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
136b2b77a04SHong Zhang #endif
137*11902ff2SHong Zhang   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
138*11902ff2SHong Zhang   for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
139*11902ff2SHong Zhang 
140b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
141b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
142b2b77a04SHong Zhang   if (!fftw->p_forward){ /* create a plan, then excute it */
143b2b77a04SHong Zhang     switch (ndim){
144b2b77a04SHong Zhang     case 1:
145b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
146b2b77a04SHong Zhang       break;
147b2b77a04SHong Zhang     case 2:
148b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
149b2b77a04SHong Zhang       break;
150b2b77a04SHong Zhang     case 3:
151b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
152b2b77a04SHong Zhang       break;
153b2b77a04SHong Zhang     default:
154*11902ff2SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft(ndim1,pdim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
155*11902ff2SHong Zhang  //     fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
156b2b77a04SHong Zhang       break;
157b2b77a04SHong Zhang     }
158b2b77a04SHong Zhang     fftw->finarray  = x_array;
159b2b77a04SHong Zhang     fftw->foutarray = y_array;
160b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
161b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
162b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
163b2b77a04SHong Zhang   } else { /* use existing plan */
164b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
165b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
166b2b77a04SHong Zhang     } else {
167b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
168b2b77a04SHong Zhang     }
169b2b77a04SHong Zhang   }
170b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
171b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
172b2b77a04SHong Zhang   PetscFunctionReturn(0);
173b2b77a04SHong Zhang }
174b2b77a04SHong Zhang 
175b2b77a04SHong Zhang #undef __FUNCT__
176b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW"
177b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
178b2b77a04SHong Zhang {
179b2b77a04SHong Zhang   PetscErrorCode ierr;
180b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
181b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
182b2b77a04SHong Zhang   PetscScalar    *x_array,*y_array;
183*11902ff2SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim,ctr;
184b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
185*11902ff2SHong Zhang   ptrdiff_t      ndim1=(ptrdiff_t)ndim,*pdim;
186b2b77a04SHong Zhang 
187b2b77a04SHong Zhang   PetscFunctionBegin;
188b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
189b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
190b2b77a04SHong Zhang #endif
191*11902ff2SHong Zhang   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr); // should pdim be a member of Mat_FFTW?
192*11902ff2SHong Zhang   for(ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
193*11902ff2SHong Zhang 
194b2b77a04SHong Zhang   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
195b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
196b2b77a04SHong Zhang   if (!fftw->p_backward){ /* create a plan, then excute it */
197b2b77a04SHong Zhang     switch (ndim){
198b2b77a04SHong Zhang     case 1:
199b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
200b2b77a04SHong Zhang       break;
201b2b77a04SHong Zhang     case 2:
202b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
203b2b77a04SHong Zhang       break;
204b2b77a04SHong Zhang     case 3:
205b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
206b2b77a04SHong Zhang       break;
207b2b77a04SHong Zhang     default:
208*11902ff2SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft(ndim1,pdim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
209b2b77a04SHong Zhang       break;
210b2b77a04SHong Zhang     }
211b2b77a04SHong Zhang     fftw->binarray  = x_array;
212b2b77a04SHong Zhang     fftw->boutarray = y_array;
213b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
214b2b77a04SHong Zhang   } else { /* use existing plan */
215b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
216b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
217b2b77a04SHong Zhang     } else {
218b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
219b2b77a04SHong Zhang     }
220b2b77a04SHong Zhang   }
221b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
222b2b77a04SHong Zhang   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
223*11902ff2SHong Zhang   ierr = PetscFree(pdim);CHKERRQ(ierr);
224b2b77a04SHong Zhang   PetscFunctionReturn(0);
225b2b77a04SHong Zhang }
226b2b77a04SHong Zhang 
227b2b77a04SHong Zhang #undef __FUNCT__
228b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW"
229b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
230b2b77a04SHong Zhang {
231b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
232b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
233b2b77a04SHong Zhang   PetscErrorCode ierr;
234b2b77a04SHong Zhang 
235b2b77a04SHong Zhang   PetscFunctionBegin;
236b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
237b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
238b2b77a04SHong Zhang #endif
239b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
240b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
241bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
242b2b77a04SHong Zhang   PetscFunctionReturn(0);
243b2b77a04SHong Zhang }
244b2b77a04SHong Zhang 
245c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
246b2b77a04SHong Zhang #undef __FUNCT__
247b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW"
248b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
249b2b77a04SHong Zhang {
250b2b77a04SHong Zhang   PetscErrorCode  ierr;
251b2b77a04SHong Zhang   PetscScalar     *array;
252b2b77a04SHong Zhang 
253b2b77a04SHong Zhang   PetscFunctionBegin;
254b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
255b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
256b2b77a04SHong Zhang #endif
257b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
258b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
259b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
260b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
261b2b77a04SHong Zhang   PetscFunctionReturn(0);
262b2b77a04SHong Zhang }
263b2b77a04SHong Zhang 
264b2b77a04SHong Zhang #undef __FUNCT__
265b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW"
266b2b77a04SHong Zhang /*
267b2b77a04SHong Zhang    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
268b2b77a04SHong Zhang      parallel layout determined by FFTW
269b2b77a04SHong Zhang 
270b2b77a04SHong Zhang    Collective on Mat
271b2b77a04SHong Zhang 
272b2b77a04SHong Zhang    Input Parameter:
273b2b77a04SHong Zhang .  mat - the matrix
274b2b77a04SHong Zhang 
275b2b77a04SHong Zhang    Output Parameter:
276b2b77a04SHong Zhang +   fin - (optional) input vector of forward FFTW
277b2b77a04SHong Zhang -   fout - (optional) output vector of forward FFTW
278b2b77a04SHong Zhang 
279b2b77a04SHong Zhang   Level: advanced
280b2b77a04SHong Zhang 
281b2b77a04SHong Zhang .seealso: MatCreateFFTW()
282b2b77a04SHong Zhang */
283b2b77a04SHong Zhang PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
284b2b77a04SHong Zhang {
285b2b77a04SHong Zhang   PetscErrorCode ierr;
286b2b77a04SHong Zhang   PetscMPIInt    size,rank;
287b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
288b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
289b2b77a04SHong Zhang   PetscInt       N=fft->N;
290b2b77a04SHong Zhang 
291b2b77a04SHong Zhang   PetscFunctionBegin;
292b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
293b2b77a04SHong Zhang   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
294b2b77a04SHong Zhang #endif
295b2b77a04SHong Zhang   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
296b2b77a04SHong Zhang   PetscValidType(A,1);
297b2b77a04SHong Zhang 
298b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
299b2b77a04SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
300b2b77a04SHong Zhang   if (size == 1){ /* sequential case */
301b2b77a04SHong Zhang     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
302b2b77a04SHong Zhang     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
303b2b77a04SHong Zhang   } else {        /* mpi case */
304b2b77a04SHong Zhang     ptrdiff_t      alloc_local,local_n0,local_0_start;
3056882372aSHong Zhang     ptrdiff_t      local_n1,local_1_end;
306*11902ff2SHong Zhang     PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n,ctr;
307b2b77a04SHong Zhang     fftw_complex   *data_fin,*data_fout;
308*11902ff2SHong Zhang     ptrdiff_t      ndim1,*pdim;
309*11902ff2SHong Zhang     ndim1=(ptrdiff_t) ndim;
310*11902ff2SHong Zhang     pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
311*11902ff2SHong Zhang 
312*11902ff2SHong Zhang     for(ctr=0;ctr<ndim;ctr++)
313*11902ff2SHong Zhang         {
314*11902ff2SHong Zhang            pdim[ctr] = dim[ctr];
315*11902ff2SHong Zhang        }
316b2b77a04SHong Zhang 
317b2b77a04SHong Zhang     switch (ndim){
318b2b77a04SHong Zhang     case 1:
3196882372aSHong Zhang       /* Get local size */
3206882372aSHong Zhang 
3216882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
3226882372aSHong Zhang       if (fin) {
3236882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
3246882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
3256882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
3266882372aSHong Zhang       }
3276882372aSHong Zhang       if (fout) {
3286882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
3296882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
3306882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
3316882372aSHong Zhang       }
332b2b77a04SHong Zhang       break;
333b2b77a04SHong Zhang     case 2:
334b2b77a04SHong Zhang       /* Get local size */
335b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
336b2b77a04SHong Zhang       if (fin) {
337b2b77a04SHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
338b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
339b2b77a04SHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
340b2b77a04SHong Zhang       }
341b2b77a04SHong Zhang       if (fout) {
342b2b77a04SHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
343b2b77a04SHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
344b2b77a04SHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
345b2b77a04SHong Zhang       }
346b2b77a04SHong Zhang       break;
347b2b77a04SHong Zhang     case 3:
3486882372aSHong Zhang       /* Get local size */
3496882372aSHong Zhang       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
350*11902ff2SHong Zhang //      printf("The quantity n is %d",n);
3516882372aSHong Zhang       if (fin) {
3526882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
3536882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
3546882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
3556882372aSHong Zhang       }
3566882372aSHong Zhang       if (fout) {
3576882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
3586882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
3596882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
3606882372aSHong Zhang       }
361b2b77a04SHong Zhang       break;
362b2b77a04SHong Zhang     default:
3636882372aSHong Zhang       /* Get local size */
364*11902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim1,pdim,comm,&local_n0,&local_0_start);
365*11902ff2SHong Zhang //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
366*11902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
367*11902ff2SHong Zhang //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
368*11902ff2SHong Zhang //      for(i=0;i<ndim;i++)
369*11902ff2SHong Zhang //         {
370*11902ff2SHong Zhang //          pdim[i]=dim[i];printf("%d",pdim[i]);
371*11902ff2SHong Zhang //         }
372*11902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
373*11902ff2SHong Zhang //      printf("The quantity n is %d",n);
3746882372aSHong Zhang       if (fin) {
3756882372aSHong Zhang         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
3766882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
3776882372aSHong Zhang         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
3786882372aSHong Zhang       }
3796882372aSHong Zhang       if (fout) {
3806882372aSHong Zhang         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
3816882372aSHong Zhang         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
3826882372aSHong Zhang         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
3836882372aSHong Zhang       }
384b2b77a04SHong Zhang       break;
385b2b77a04SHong Zhang     }
386b2b77a04SHong Zhang   }
387fb7c10e0SHong Zhang   if (fin){
388262f75f9SHong Zhang     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
389fb7c10e0SHong Zhang   }
390fb7c10e0SHong Zhang   if (fout){
391262f75f9SHong Zhang     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
392fb7c10e0SHong Zhang   }
393b2b77a04SHong Zhang   PetscFunctionReturn(0);
394b2b77a04SHong Zhang }
395b2b77a04SHong Zhang 
396b2b77a04SHong Zhang EXTERN_C_BEGIN
397b2b77a04SHong Zhang #undef __FUNCT__
398b2b77a04SHong Zhang #define __FUNCT__ "MatCreate_FFTW"
399b2b77a04SHong Zhang /*
400b2b77a04SHong Zhang       MatCreate_FFTW - Creates a matrix object that provides FFT
401b2b77a04SHong Zhang   via the external package FFTW
402b2b77a04SHong Zhang 
403b2b77a04SHong Zhang   Options Database Keys:
404b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags
405b2b77a04SHong Zhang 
406b2b77a04SHong Zhang    Level: intermediate
407b2b77a04SHong Zhang 
408b2b77a04SHong Zhang */
409b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A)
410b2b77a04SHong Zhang {
411b2b77a04SHong Zhang   PetscErrorCode ierr;
412b2b77a04SHong Zhang   MPI_Comm       comm=((PetscObject)A)->comm;
413b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
414b2b77a04SHong Zhang   Mat_FFTW       *fftw;
415b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
416b2b77a04SHong Zhang   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
417b2b77a04SHong Zhang   PetscBool      flg;
4186882372aSHong Zhang   PetscInt       p_flag,partial_dim=1,ctr;
419*11902ff2SHong Zhang   PetscMPIInt    size,rank;
420*11902ff2SHong Zhang   ptrdiff_t      *pdim;
421b2b77a04SHong Zhang 
422b2b77a04SHong Zhang   PetscFunctionBegin;
423b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX)
424b2b77a04SHong Zhang   SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
425b2b77a04SHong Zhang #endif
426b2b77a04SHong Zhang 
427b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
428*11902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
429*11902ff2SHong Zhang   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
430*11902ff2SHong Zhang   pdim[0] = dim[0];
431*11902ff2SHong Zhang   for(ctr=1;ctr<ndim;ctr++)
432*11902ff2SHong Zhang       {
4336882372aSHong Zhang           partial_dim*=dim[ctr];
434*11902ff2SHong Zhang           pdim[ctr] = dim[ctr];
4356882372aSHong Zhang       }
436*11902ff2SHong Zhang //  printf("partial dimension is %d",partial_dim);
437b2b77a04SHong Zhang   if (size == 1) {
438b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
439b2b77a04SHong Zhang     n = N;
440b2b77a04SHong Zhang   } else {
4416882372aSHong Zhang     ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end;
442b2b77a04SHong Zhang     switch (ndim){
443b2b77a04SHong Zhang     case 1:
4446882372aSHong Zhang       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
4456882372aSHong Zhang       n = (PetscInt)local_n0;
4466882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
4476882372aSHong Zhang 
448b2b77a04SHong Zhang       break;
449b2b77a04SHong Zhang     case 2:
450b2b77a04SHong Zhang       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
451b2b77a04SHong Zhang       /*
452b2b77a04SHong Zhang        PetscMPIInt    rank;
453b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
454b2b77a04SHong Zhang        PetscSynchronizedFlush(comm);
455b2b77a04SHong Zhang        */
456b2b77a04SHong Zhang       n = (PetscInt)local_n0*dim[1];
457b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
458b2b77a04SHong Zhang       break;
459b2b77a04SHong Zhang     case 3:
4606882372aSHong Zhang       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
461*11902ff2SHong Zhang //      printf("The value of alloc local is %d",alloc_local);
4626882372aSHong Zhang       n = (PetscInt)local_n0*dim[1]*dim[2];
4636882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
464b2b77a04SHong Zhang       break;
465b2b77a04SHong Zhang     default:
466*11902ff2SHong Zhang       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
467*11902ff2SHong Zhang //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
468*11902ff2SHong Zhang //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
4696882372aSHong Zhang       n = (PetscInt)local_n0*partial_dim;
470*11902ff2SHong Zhang //      printf("New partial dimension is %d %d %d",n,N,ndim);
4716882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
472b2b77a04SHong Zhang       break;
473b2b77a04SHong Zhang     }
474b2b77a04SHong Zhang   }
475b2b77a04SHong Zhang   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
476b2b77a04SHong Zhang 
477b2b77a04SHong Zhang   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
478b2b77a04SHong Zhang   fft->data = (void*)fftw;
479b2b77a04SHong Zhang 
480b2b77a04SHong Zhang   fft->n           = n;
481b2b77a04SHong Zhang   fftw->p_forward  = 0;
482b2b77a04SHong Zhang   fftw->p_backward = 0;
483b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
484b2b77a04SHong Zhang 
485b2b77a04SHong Zhang   if (size == 1){
486b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
487b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
488b2b77a04SHong Zhang   } else {
489b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
490b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
491b2b77a04SHong Zhang   }
492b2b77a04SHong Zhang   fft->matdestroy          = MatDestroy_FFTW;
493b2b77a04SHong Zhang   A->ops->getvecs       = MatGetVecs_FFTW;
494b2b77a04SHong Zhang   A->assembled          = PETSC_TRUE;
495b2b77a04SHong Zhang 
496b2b77a04SHong Zhang   /* get runtime options */
497b2b77a04SHong Zhang   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
498b2b77a04SHong Zhang     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
499b2b77a04SHong Zhang     if (flg) {fftw->p_flag = (unsigned)p_flag;}
500b2b77a04SHong Zhang   PetscOptionsEnd();
501b2b77a04SHong Zhang   PetscFunctionReturn(0);
502b2b77a04SHong Zhang }
503b2b77a04SHong Zhang EXTERN_C_END
504