xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 9448b7f1ca3c3549e6b999e23e45403553ba4027)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4b2b77a04SHong Zhang     Testing examples can be found in ~src/mat/examples/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t    ndim_fftw,*dim_fftw;
14a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
15a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
16a1b6d50cSHong Zhang #else
178c1d5ab3SHong Zhang   fftw_iodim   *iodims;
18a1b6d50cSHong Zhang #endif
19e5d7f247SAmlan Barua   PetscInt     partial_dim;
20b2b77a04SHong Zhang   fftw_plan    p_forward,p_backward;
21b2b77a04SHong Zhang   unsigned     p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
22b2b77a04SHong Zhang   PetscScalar  *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
23b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
24b2b77a04SHong Zhang } Mat_FFTW;
25b2b77a04SHong Zhang 
26b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
27b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
28b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
29b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
30b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
31b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
32b2b77a04SHong Zhang 
3397504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel
3497504071SAmlan Barua    Input parameter:
353564f093SHong Zhang      A - the matrix
3697504071SAmlan Barua      x - the vector on which FDFT will be performed
3797504071SAmlan Barua 
3897504071SAmlan Barua    Output parameter:
3997504071SAmlan Barua      y - vector that stores result of FDFT
4097504071SAmlan Barua */
41b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
42b2b77a04SHong Zhang {
43b2b77a04SHong Zhang   PetscErrorCode ierr;
44b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
45b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
46f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
47f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
481acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
49a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
50a1b6d50cSHong Zhang   fftw_iodim64   *iodims;
51a1b6d50cSHong Zhang #else
528c1d5ab3SHong Zhang   fftw_iodim     *iodims;
53a1b6d50cSHong Zhang #endif
541acd80e4SHong Zhang   PetscInt       i;
551acd80e4SHong Zhang #endif
561acd80e4SHong Zhang   PetscInt       ndim = fft->ndim,*dim = fft->dim;
57b2b77a04SHong Zhang 
58b2b77a04SHong Zhang   PetscFunctionBegin;
59f4fca6d4SMatthew G. Knepley   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
60b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
61b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
62b2b77a04SHong Zhang     switch (ndim) {
63b2b77a04SHong Zhang     case 1:
6458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
65b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6658a912c5SAmlan Barua #else
6758a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
6858a912c5SAmlan Barua #endif
69b2b77a04SHong Zhang       break;
70b2b77a04SHong Zhang     case 2:
7158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
72b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
7358a912c5SAmlan Barua #else
7458a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
7558a912c5SAmlan Barua #endif
76b2b77a04SHong Zhang       break;
77b2b77a04SHong Zhang     case 3:
7858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
79b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
8058a912c5SAmlan Barua #else
8158a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
8258a912c5SAmlan Barua #endif
83b2b77a04SHong Zhang       break;
84b2b77a04SHong Zhang     default:
8558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
86a1b6d50cSHong Zhang       iodims = fftw->iodims;
87a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
888c1d5ab3SHong Zhang       if (ndim) {
89a1b6d50cSHong Zhang         iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
908c1d5ab3SHong Zhang         iodims[ndim-1].is = iodims[ndim-1].os = 1;
918c1d5ab3SHong Zhang         for (i=ndim-2; i>=0; --i) {
92a1b6d50cSHong Zhang           iodims[i].n = (ptrdiff_t)dim[i];
938c1d5ab3SHong Zhang           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
948c1d5ab3SHong Zhang         }
958c1d5ab3SHong Zhang       }
96a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
97a1b6d50cSHong Zhang #else
98a1b6d50cSHong Zhang       if (ndim) {
99a1b6d50cSHong Zhang         iodims[ndim-1].n = (int)dim[ndim-1];
100a1b6d50cSHong Zhang         iodims[ndim-1].is = iodims[ndim-1].os = 1;
101a1b6d50cSHong Zhang         for (i=ndim-2; i>=0; --i) {
102a1b6d50cSHong Zhang           iodims[i].n = (int)dim[i];
103a1b6d50cSHong Zhang           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
104a1b6d50cSHong Zhang         }
105a1b6d50cSHong Zhang       }
106a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
107a1b6d50cSHong Zhang #endif
108a1b6d50cSHong Zhang 
10958a912c5SAmlan Barua #else
110a31b9140SHong Zhang       fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
11158a912c5SAmlan Barua #endif
112b2b77a04SHong Zhang       break;
113b2b77a04SHong Zhang     }
114f4fca6d4SMatthew G. Knepley     fftw->finarray  = (PetscScalar *) x_array;
115b2b77a04SHong Zhang     fftw->foutarray = y_array;
116b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
117b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
118b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
119b2b77a04SHong Zhang   } else { /* use existing plan */
120b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
1217e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
122b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
1237e4bc134SDominic Meiser #else
1247e4bc134SDominic Meiser       fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array);
1257e4bc134SDominic Meiser #endif
126b2b77a04SHong Zhang     } else {
127b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
128b2b77a04SHong Zhang     }
129b2b77a04SHong Zhang   }
130b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
131f4fca6d4SMatthew G. Knepley   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
132b2b77a04SHong Zhang   PetscFunctionReturn(0);
133b2b77a04SHong Zhang }
134b2b77a04SHong Zhang 
13597504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT
13697504071SAmlan Barua    Input parameter:
1373564f093SHong Zhang      A - the matrix
13897504071SAmlan Barua      x - the vector on which BDFT will be performed
13997504071SAmlan Barua 
14097504071SAmlan Barua    Output parameter:
14197504071SAmlan Barua      y - vector that stores result of BDFT
14297504071SAmlan Barua */
14397504071SAmlan Barua 
144b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
145b2b77a04SHong Zhang {
146b2b77a04SHong Zhang   PetscErrorCode ierr;
147b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
148b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
149f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
150f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
151b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
1521acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
153a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
154a1b6d50cSHong Zhang   fftw_iodim64   *iodims=fftw->iodims;
155a1b6d50cSHong Zhang #else
156a1b6d50cSHong Zhang   fftw_iodim     *iodims=fftw->iodims;
157a1b6d50cSHong Zhang #endif
1581acd80e4SHong Zhang #endif
159b2b77a04SHong Zhang 
160b2b77a04SHong Zhang   PetscFunctionBegin;
161f4fca6d4SMatthew G. Knepley   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
162b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
163b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
164b2b77a04SHong Zhang     switch (ndim) {
165b2b77a04SHong Zhang     case 1:
16658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
167b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
16858a912c5SAmlan Barua #else
169e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
17058a912c5SAmlan Barua #endif
171b2b77a04SHong Zhang       break;
172b2b77a04SHong Zhang     case 2:
17358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
174b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
17558a912c5SAmlan Barua #else
176e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
17758a912c5SAmlan Barua #endif
178b2b77a04SHong Zhang       break;
179b2b77a04SHong Zhang     case 3:
18058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
181b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
18258a912c5SAmlan Barua #else
183e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
18458a912c5SAmlan Barua #endif
185b2b77a04SHong Zhang       break;
186b2b77a04SHong Zhang     default:
18758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
188a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
189a1b6d50cSHong Zhang       fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
190a1b6d50cSHong Zhang #else
1918c1d5ab3SHong Zhang       fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
192a1b6d50cSHong Zhang #endif
19358a912c5SAmlan Barua #else
194a31b9140SHong Zhang       fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
19558a912c5SAmlan Barua #endif
196b2b77a04SHong Zhang       break;
197b2b77a04SHong Zhang     }
198f4fca6d4SMatthew G. Knepley     fftw->binarray  = (PetscScalar *) x_array;
199b2b77a04SHong Zhang     fftw->boutarray = y_array;
200b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
201b2b77a04SHong Zhang   } else { /* use existing plan */
202b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
2037e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
204b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
2057e4bc134SDominic Meiser #else
2067e4bc134SDominic Meiser       fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array);
2077e4bc134SDominic Meiser #endif
208b2b77a04SHong Zhang     } else {
209b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
210b2b77a04SHong Zhang     }
211b2b77a04SHong Zhang   }
212b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
213f4fca6d4SMatthew G. Knepley   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
214b2b77a04SHong Zhang   PetscFunctionReturn(0);
215b2b77a04SHong Zhang }
216b2b77a04SHong Zhang 
21797504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel
21897504071SAmlan Barua    Input parameter:
2193564f093SHong Zhang      A - the matrix
22097504071SAmlan Barua      x - the vector on which FDFT will be performed
22197504071SAmlan Barua 
22297504071SAmlan Barua    Output parameter:
22397504071SAmlan Barua    y   - vector that stores result of FDFT
22497504071SAmlan Barua */
225b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
226b2b77a04SHong Zhang {
227b2b77a04SHong Zhang   PetscErrorCode ierr;
228b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
229b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
230f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
231f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
232c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
233ce94432eSBarry Smith   MPI_Comm       comm;
234b2b77a04SHong Zhang 
235b2b77a04SHong Zhang   PetscFunctionBegin;
236ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
237f4fca6d4SMatthew G. Knepley   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
238b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
239b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
240b2b77a04SHong Zhang     switch (ndim) {
241b2b77a04SHong Zhang     case 1:
2423c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
243b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
244ae0a50aaSHong Zhang #else
2454f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
2463c3a9c75SAmlan Barua #endif
247b2b77a04SHong Zhang       break;
248b2b77a04SHong Zhang     case 2:
24997504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
250b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2513c3a9c75SAmlan Barua #else
2523c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2533c3a9c75SAmlan Barua #endif
254b2b77a04SHong Zhang       break;
255b2b77a04SHong Zhang     case 3:
2563c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
257b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2583c3a9c75SAmlan Barua #else
25951d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2603c3a9c75SAmlan Barua #endif
261b2b77a04SHong Zhang       break;
262b2b77a04SHong Zhang     default:
2633c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
264c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2653c3a9c75SAmlan Barua #else
2663c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2673c3a9c75SAmlan Barua #endif
268b2b77a04SHong Zhang       break;
269b2b77a04SHong Zhang     }
270f4fca6d4SMatthew G. Knepley     fftw->finarray  = (PetscScalar *) x_array;
271b2b77a04SHong Zhang     fftw->foutarray = y_array;
272b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
273b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
274b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
275b2b77a04SHong Zhang   } else { /* use existing plan */
276b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
277b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
278b2b77a04SHong Zhang     } else {
279b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
280b2b77a04SHong Zhang     }
281b2b77a04SHong Zhang   }
282b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
283f4fca6d4SMatthew G. Knepley   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
284b2b77a04SHong Zhang   PetscFunctionReturn(0);
285b2b77a04SHong Zhang }
286b2b77a04SHong Zhang 
28797504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT
28897504071SAmlan Barua    Input parameter:
2893564f093SHong Zhang      A - the matrix
29097504071SAmlan Barua      x - the vector on which BDFT will be performed
29197504071SAmlan Barua 
29297504071SAmlan Barua    Output parameter:
29397504071SAmlan Barua      y - vector that stores result of BDFT
29497504071SAmlan Barua */
295b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
296b2b77a04SHong Zhang {
297b2b77a04SHong Zhang   PetscErrorCode ierr;
298b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
299b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
300f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
301f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
302c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
303ce94432eSBarry Smith   MPI_Comm       comm;
304b2b77a04SHong Zhang 
305b2b77a04SHong Zhang   PetscFunctionBegin;
306ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
307f4fca6d4SMatthew G. Knepley   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
308b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
309b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
310b2b77a04SHong Zhang     switch (ndim) {
311b2b77a04SHong Zhang     case 1:
3123c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
313b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
314ae0a50aaSHong Zhang #else
3154f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
3163c3a9c75SAmlan Barua #endif
317b2b77a04SHong Zhang       break;
318b2b77a04SHong Zhang     case 2:
31997504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
320b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3213c3a9c75SAmlan Barua #else
3223c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3233c3a9c75SAmlan Barua #endif
324b2b77a04SHong Zhang       break;
325b2b77a04SHong Zhang     case 3:
3263c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
327b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3283c3a9c75SAmlan Barua #else
3293c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3303c3a9c75SAmlan Barua #endif
331b2b77a04SHong Zhang       break;
332b2b77a04SHong Zhang     default:
3333c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
334c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3353c3a9c75SAmlan Barua #else
3363c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3373c3a9c75SAmlan Barua #endif
338b2b77a04SHong Zhang       break;
339b2b77a04SHong Zhang     }
340f4fca6d4SMatthew G. Knepley     fftw->binarray  = (PetscScalar *) x_array;
341b2b77a04SHong Zhang     fftw->boutarray = y_array;
342b2b77a04SHong Zhang     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
343b2b77a04SHong Zhang   } else { /* use existing plan */
344b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
345b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
346b2b77a04SHong Zhang     } else {
347b2b77a04SHong Zhang       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
348b2b77a04SHong Zhang     }
349b2b77a04SHong Zhang   }
350b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
351f4fca6d4SMatthew G. Knepley   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
352b2b77a04SHong Zhang   PetscFunctionReturn(0);
353b2b77a04SHong Zhang }
354b2b77a04SHong Zhang 
355b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
356b2b77a04SHong Zhang {
357b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
358b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
359b2b77a04SHong Zhang   PetscErrorCode ierr;
360b2b77a04SHong Zhang 
361b2b77a04SHong Zhang   PetscFunctionBegin;
362b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
363b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
3648c1d5ab3SHong Zhang   if (fftw->iodims) {
3658c1d5ab3SHong Zhang     free(fftw->iodims);
3668c1d5ab3SHong Zhang   }
367bb5bf6f6SHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
368bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
3696ccf0b3eSHong Zhang   fftw_mpi_cleanup();
370b2b77a04SHong Zhang   PetscFunctionReturn(0);
371b2b77a04SHong Zhang }
372b2b77a04SHong Zhang 
373c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
374b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
375b2b77a04SHong Zhang {
376b2b77a04SHong Zhang   PetscErrorCode ierr;
377b2b77a04SHong Zhang   PetscScalar    *array;
378b2b77a04SHong Zhang 
379b2b77a04SHong Zhang   PetscFunctionBegin;
380b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
381b2b77a04SHong Zhang   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
382b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
383b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
384b2b77a04SHong Zhang   PetscFunctionReturn(0);
385b2b77a04SHong Zhang }
386b2b77a04SHong Zhang 
3874be45526SHong Zhang /*@
3882a7a6963SBarry Smith    MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
3894f7415efSAmlan Barua      parallel layout determined by FFTW
3904f7415efSAmlan Barua 
3914f7415efSAmlan Barua    Collective on Mat
3924f7415efSAmlan Barua 
3934f7415efSAmlan Barua    Input Parameter:
3943564f093SHong Zhang .   A - the matrix
3954f7415efSAmlan Barua 
3964f7415efSAmlan Barua    Output Parameter:
397cc55f3b1SHong Zhang +   x - (optional) input vector of forward FFTW
398cc55f3b1SHong Zhang -   y - (optional) output vector of forward FFTW
399cc55f3b1SHong Zhang -   z - (optional) output vector of backward FFTW
4004f7415efSAmlan Barua 
4014f7415efSAmlan Barua   Level: advanced
4023564f093SHong Zhang 
40397504071SAmlan Barua   Note: The parallel layout of output of forward FFTW is always same as the input
40497504071SAmlan Barua         of the backward FFTW. But parallel layout of the input vector of forward
40597504071SAmlan Barua         FFTW might not be same as the output of backward FFTW.
40697504071SAmlan Barua         Also note that we need to provide enough space while doing parallel real transform.
40797504071SAmlan Barua         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
40897504071SAmlan Barua         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
409e0ec536eSAmlan Barua         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
410e0ec536eSAmlan Barua         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
411e0ec536eSAmlan Barua         zeros if it is odd only one zero is needed.
412e0ec536eSAmlan Barua         Lastly one needs some scratch space at the end of data set in each process. alloc_local
413e0ec536eSAmlan Barua         figures out how much space is needed, i.e. it figures out the data+scratch space for
414e0ec536eSAmlan Barua         each processor and returns that.
4154f7415efSAmlan Barua 
4164f7415efSAmlan Barua .seealso: MatCreateFFTW()
4174be45526SHong Zhang @*/
4182a7a6963SBarry Smith PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
4194be45526SHong Zhang {
4204be45526SHong Zhang   PetscErrorCode ierr;
421b4c3921fSHong Zhang 
4224be45526SHong Zhang   PetscFunctionBegin;
423163d334eSBarry Smith   ierr = PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
4244be45526SHong Zhang   PetscFunctionReturn(0);
4254be45526SHong Zhang }
4264be45526SHong Zhang 
4272a7a6963SBarry Smith PetscErrorCode  MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
4284f7415efSAmlan Barua {
4294f7415efSAmlan Barua   PetscErrorCode ierr;
4304f7415efSAmlan Barua   PetscMPIInt    size,rank;
431ce94432eSBarry Smith   MPI_Comm       comm;
4324f7415efSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
4334f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
4349496c9aeSAmlan Barua   PetscInt       N     = fft->N;
4354f7415efSAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
4364f7415efSAmlan Barua 
4374f7415efSAmlan Barua   PetscFunctionBegin;
4384f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
4394f7415efSAmlan Barua   PetscValidType(A,1);
440ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4414f7415efSAmlan Barua 
4424f8276c3SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
4434f8276c3SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
4444f7415efSAmlan Barua   if (size == 1) { /* sequential case */
4454f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4464f7415efSAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
4474f7415efSAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
4484f7415efSAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
4494f7415efSAmlan Barua #else
4508ca4c034SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
4518ca4c034SAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
4528ca4c034SAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
4534f7415efSAmlan Barua #endif
45497504071SAmlan Barua   } else { /* parallel cases */
4554f7415efSAmlan Barua     ptrdiff_t    alloc_local,local_n0,local_0_start;
4569496c9aeSAmlan Barua     ptrdiff_t    local_n1;
4579496c9aeSAmlan Barua     fftw_complex *data_fout;
4589496c9aeSAmlan Barua     ptrdiff_t    local_1_start;
4599496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4609496c9aeSAmlan Barua     fftw_complex *data_fin,*data_bout;
4619496c9aeSAmlan Barua #else
4624f7415efSAmlan Barua     double    *data_finr,*data_boutr;
4636ccf0b3eSHong Zhang     PetscInt  n1,N1;
4649496c9aeSAmlan Barua     ptrdiff_t temp;
4659496c9aeSAmlan Barua #endif
4669496c9aeSAmlan Barua 
4674f7415efSAmlan Barua     switch (ndim) {
4684f7415efSAmlan Barua     case 1:
46957625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
4704f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
47157625b84SAmlan Barua #else
47257625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
47357625b84SAmlan Barua       if (fin) {
47457625b84SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
475778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
47626fbe8dcSKarl Rupp 
47757625b84SAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
47857625b84SAmlan Barua       }
47957625b84SAmlan Barua       if (fout) {
48057625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
481778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
48226fbe8dcSKarl Rupp 
48357625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
48457625b84SAmlan Barua       }
48557625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
48657625b84SAmlan Barua       if (bout) {
48757625b84SAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
488778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
48926fbe8dcSKarl Rupp 
49057625b84SAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
49157625b84SAmlan Barua       }
49257625b84SAmlan Barua       break;
49357625b84SAmlan Barua #endif
4944f7415efSAmlan Barua     case 2:
49597504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
4964f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
4974f7415efSAmlan Barua       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
4984f7415efSAmlan Barua       if (fin) {
4994f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
500778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
50126fbe8dcSKarl Rupp 
5024f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5034f7415efSAmlan Barua       }
5044f7415efSAmlan Barua       if (bout) {
5054f7415efSAmlan Barua         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
506778a2246SBarry Smith         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
50726fbe8dcSKarl Rupp 
5084f7415efSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5094f7415efSAmlan Barua       }
51057625b84SAmlan Barua       if (fout) {
51157625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
512778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
51326fbe8dcSKarl Rupp 
51457625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
51557625b84SAmlan Barua       }
5164f7415efSAmlan Barua #else
5174f7415efSAmlan Barua       /* Get local size */
5184f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
5194f7415efSAmlan Barua       if (fin) {
5204f7415efSAmlan Barua         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
521778a2246SBarry Smith         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
52226fbe8dcSKarl Rupp 
5234f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5244f7415efSAmlan Barua       }
5254f7415efSAmlan Barua       if (fout) {
5264f7415efSAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
527778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
52826fbe8dcSKarl Rupp 
5294f7415efSAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
5304f7415efSAmlan Barua       }
5314f7415efSAmlan Barua       if (bout) {
5324f7415efSAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
533778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
53426fbe8dcSKarl Rupp 
5354f7415efSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5364f7415efSAmlan Barua       }
5374f7415efSAmlan Barua #endif
5384f7415efSAmlan Barua       break;
5394f7415efSAmlan Barua     case 3:
5404f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5414f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5424f7415efSAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
5434f7415efSAmlan Barua       if (fin) {
5444f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
545778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
54626fbe8dcSKarl Rupp 
5474f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5484f7415efSAmlan Barua       }
5494f7415efSAmlan Barua       if (bout) {
5504f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
551778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
55226fbe8dcSKarl Rupp 
5534f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5544f7415efSAmlan Barua       }
5553564f093SHong Zhang 
55657625b84SAmlan Barua       if (fout) {
55757625b84SAmlan Barua         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
558778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
55926fbe8dcSKarl Rupp 
56057625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
56157625b84SAmlan Barua       }
5624f7415efSAmlan Barua #else
5630c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
5640c9b18e4SAmlan Barua       if (fin) {
5650c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
566778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
56726fbe8dcSKarl Rupp 
5680c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5690c9b18e4SAmlan Barua       }
5700c9b18e4SAmlan Barua       if (fout) {
5710c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
572778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
57326fbe8dcSKarl Rupp 
5740c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5750c9b18e4SAmlan Barua       }
5760c9b18e4SAmlan Barua       if (bout) {
5770c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
578778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
57926fbe8dcSKarl Rupp 
5800c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5810c9b18e4SAmlan Barua       }
5824f7415efSAmlan Barua #endif
5834f7415efSAmlan Barua       break;
5844f7415efSAmlan Barua     default:
5854f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5864f7415efSAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
58726fbe8dcSKarl Rupp 
5884f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
58926fbe8dcSKarl Rupp 
5904f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5914f7415efSAmlan Barua       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
59226fbe8dcSKarl Rupp 
5934f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
5944f7415efSAmlan Barua 
5954f7415efSAmlan Barua       if (fin) {
5964f7415efSAmlan Barua         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
597778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
59826fbe8dcSKarl Rupp 
5994f7415efSAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
6004f7415efSAmlan Barua       }
6014f7415efSAmlan Barua       if (bout) {
6024f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
603778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
60426fbe8dcSKarl Rupp 
6059496c9aeSAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
6064f7415efSAmlan Barua       }
6073564f093SHong Zhang 
60857625b84SAmlan Barua       if (fout) {
60957625b84SAmlan Barua         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
610778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
61126fbe8dcSKarl Rupp 
61257625b84SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
61357625b84SAmlan Barua       }
6144f7415efSAmlan Barua #else
6150c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
6160c9b18e4SAmlan Barua       if (fin) {
6170c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
618778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
61926fbe8dcSKarl Rupp 
6200c9b18e4SAmlan Barua         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
6210c9b18e4SAmlan Barua       }
6220c9b18e4SAmlan Barua       if (fout) {
6230c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
624778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
62526fbe8dcSKarl Rupp 
6260c9b18e4SAmlan Barua         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
6270c9b18e4SAmlan Barua       }
6280c9b18e4SAmlan Barua       if (bout) {
6290c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
630778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
63126fbe8dcSKarl Rupp 
6320c9b18e4SAmlan Barua         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
6330c9b18e4SAmlan Barua       }
6344f7415efSAmlan Barua #endif
6354f7415efSAmlan Barua       break;
6364f7415efSAmlan Barua     }
6374f7415efSAmlan Barua   }
6384f7415efSAmlan Barua   PetscFunctionReturn(0);
6394f7415efSAmlan Barua }
6404f7415efSAmlan Barua 
6413564f093SHong Zhang /*@
6423564f093SHong Zhang    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
64354efbe56SHong Zhang 
6443564f093SHong Zhang    Collective on Mat
6453564f093SHong Zhang 
6463564f093SHong Zhang    Input Parameters:
6473564f093SHong Zhang +  A - FFTW matrix
6483564f093SHong Zhang -  x - the PETSc vector
6493564f093SHong Zhang 
6503564f093SHong Zhang    Output Parameters:
6513564f093SHong Zhang .  y - the FFTW vector
6523564f093SHong Zhang 
653b2b77a04SHong Zhang   Options Database Keys:
6543564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags
655b2b77a04SHong Zhang 
656b2b77a04SHong Zhang    Level: intermediate
6573564f093SHong Zhang 
65897504071SAmlan Barua    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
65997504071SAmlan Barua          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
66097504071SAmlan Barua          zeros. This routine does that job by scattering operation.
66197504071SAmlan Barua 
6623564f093SHong Zhang .seealso: VecScatterFFTWToPetsc()
6633564f093SHong Zhang @*/
6643564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
6653564f093SHong Zhang {
6663564f093SHong Zhang   PetscErrorCode ierr;
667b2b77a04SHong Zhang 
6683564f093SHong Zhang   PetscFunctionBegin;
669163d334eSBarry Smith   ierr = PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
6703564f093SHong Zhang   PetscFunctionReturn(0);
6713564f093SHong Zhang }
6723c3a9c75SAmlan Barua 
67374a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
6743c3a9c75SAmlan Barua {
6753c3a9c75SAmlan Barua   PetscErrorCode ierr;
676ce94432eSBarry Smith   MPI_Comm       comm;
6773c3a9c75SAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
6783c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
6799496c9aeSAmlan Barua   PetscInt       N     =fft->N;
680b5d11533SAmlan Barua   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
6819496c9aeSAmlan Barua   PetscInt       low;
6827a21806cSHong Zhang   PetscMPIInt    rank,size;
6837a21806cSHong Zhang   PetscInt       vsize,vsize1;
6847a21806cSHong Zhang   ptrdiff_t      local_n0,local_0_start;
6859496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
6863c3a9c75SAmlan Barua   VecScatter     vecscat;
6873c3a9c75SAmlan Barua   IS             list1,list2;
6889496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6899496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
6909496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
691a31b9140SHong Zhang   PetscInt       NM;
6929496c9aeSAmlan Barua   ptrdiff_t      temp;
6939496c9aeSAmlan Barua #endif
6943c3a9c75SAmlan Barua 
6953564f093SHong Zhang   PetscFunctionBegin;
696ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
697f76f76beSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
698f76f76beSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
6990298fd71SBarry Smith   ierr = VecGetOwnershipRange(y,&low,NULL);
7003c3a9c75SAmlan Barua 
7013564f093SHong Zhang   if (size==1) {
7028ca4c034SAmlan Barua     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
7038ca4c034SAmlan Barua     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
7049496c9aeSAmlan Barua     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
705*9448b7f1SJunchao Zhang     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
7066971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7076971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7086971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
709b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
7103564f093SHong Zhang   } else {
7113c3a9c75SAmlan Barua     switch (ndim) {
7123c3a9c75SAmlan Barua     case 1:
71364657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7147a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
71526fbe8dcSKarl Rupp 
7164f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);
7174f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0,low,1,&list2);
718*9448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
71964657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
72064657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
72164657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
72264657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
72364657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
72464657d84SAmlan Barua #else
725e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
72664657d84SAmlan Barua #endif
7273c3a9c75SAmlan Barua       break;
7283c3a9c75SAmlan Barua     case 2:
729bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7307a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
73126fbe8dcSKarl Rupp 
7324f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
7334f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
734*9448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
735bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
736bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
737bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
738bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
739bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
740bd59e6c6SAmlan Barua #else
741c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
74226fbe8dcSKarl Rupp 
743854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
744854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
7453c3a9c75SAmlan Barua 
7463564f093SHong Zhang       if (dim[1]%2==0) {
7473c3a9c75SAmlan Barua         NM = dim[1]+2;
7483564f093SHong Zhang       } else {
7493c3a9c75SAmlan Barua         NM = dim[1]+1;
7503564f093SHong Zhang       }
7513c3a9c75SAmlan Barua       for (i=0; i<local_n0; i++) {
7523c3a9c75SAmlan Barua         for (j=0; j<dim[1]; j++) {
7533c3a9c75SAmlan Barua           tempindx  = i*dim[1] + j;
7543c3a9c75SAmlan Barua           tempindx1 = i*NM + j;
75526fbe8dcSKarl Rupp 
7565b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
7573c3a9c75SAmlan Barua           indx2[tempindx]=low+tempindx1;
7583c3a9c75SAmlan Barua         }
7593c3a9c75SAmlan Barua       }
7603c3a9c75SAmlan Barua 
7613c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
7623c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
7633c3a9c75SAmlan Barua 
764*9448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
765f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
766f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
767f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
768b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
769b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
770b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
771b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
772bd59e6c6SAmlan Barua #endif
7739496c9aeSAmlan Barua       break;
7743c3a9c75SAmlan Barua 
7753c3a9c75SAmlan Barua     case 3:
776bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7777a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
77826fbe8dcSKarl Rupp 
7794f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
7804f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
781*9448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
782bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
783bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
784bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
785bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
786bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
787bd59e6c6SAmlan Barua #else
788f1ade23cSHong Zhang       /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
789f1ade23cSHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform");
7907a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
79126fbe8dcSKarl Rupp 
792854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
793854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
79451d1eed7SAmlan Barua 
795db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
796db4deed7SKarl Rupp       else             NM = dim[2]+1;
79751d1eed7SAmlan Barua 
79851d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
79951d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
80051d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
80151d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
80251d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
80326fbe8dcSKarl Rupp 
80451d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
80551d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
80651d1eed7SAmlan Barua           }
80751d1eed7SAmlan Barua         }
80851d1eed7SAmlan Barua       }
80951d1eed7SAmlan Barua 
81051d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
81151d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
812*9448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
81351d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
81451d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
81551d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
816b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
817b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
818b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
819b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
820bd59e6c6SAmlan Barua #endif
8219496c9aeSAmlan Barua       break;
8223c3a9c75SAmlan Barua 
8233c3a9c75SAmlan Barua     default:
824bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
8257a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
82626fbe8dcSKarl Rupp 
8274f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
8284f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
829*9448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
830bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
831bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
832bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
833bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
834bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
835bd59e6c6SAmlan Barua #else
836f1ade23cSHong Zhang       /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
837f1ade23cSHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform");
838e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
83926fbe8dcSKarl Rupp 
840e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
84126fbe8dcSKarl Rupp 
8427a21806cSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
84326fbe8dcSKarl Rupp 
844e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
845e5d7f247SAmlan Barua 
846e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
847e5d7f247SAmlan Barua 
848854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
849854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
850e5d7f247SAmlan Barua 
851db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
852db4deed7SKarl Rupp       else                  NM = 1;
853e5d7f247SAmlan Barua 
8546971673cSAmlan Barua       j = low;
8553564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
8566971673cSAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
8576971673cSAmlan Barua         indx2[i] = j;
85826fbe8dcSKarl Rupp         if (k%dim[ndim-1]==0) j+=NM;
8596971673cSAmlan Barua         j++;
8606971673cSAmlan Barua       }
8616971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
8626971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
863*9448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
8646971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8656971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
8666971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
867b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
868b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
869b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
870b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
871bd59e6c6SAmlan Barua #endif
8729496c9aeSAmlan Barua       break;
8733c3a9c75SAmlan Barua     }
874e81bb599SAmlan Barua   }
8753564f093SHong Zhang   PetscFunctionReturn(0);
8763c3a9c75SAmlan Barua }
8773c3a9c75SAmlan Barua 
8783564f093SHong Zhang /*@
8793564f093SHong Zhang    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
8803564f093SHong Zhang 
8813564f093SHong Zhang    Collective on Mat
8823564f093SHong Zhang 
8833564f093SHong Zhang     Input Parameters:
8843564f093SHong Zhang +   A - FFTW matrix
8853564f093SHong Zhang -   x - FFTW vector
8863564f093SHong Zhang 
8873564f093SHong Zhang    Output Parameters:
8883564f093SHong Zhang .  y - PETSc vector
8893564f093SHong Zhang 
8903564f093SHong Zhang    Level: intermediate
8913564f093SHong Zhang 
8923564f093SHong Zhang    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
8933564f093SHong Zhang          VecScatterFFTWToPetsc removes those extra zeros.
8943564f093SHong Zhang 
8953564f093SHong Zhang .seealso: VecScatterPetscToFFTW()
8963564f093SHong Zhang @*/
89774a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
8983c3a9c75SAmlan Barua {
8993c3a9c75SAmlan Barua   PetscErrorCode ierr;
9005fd66863SKarl Rupp 
9013c3a9c75SAmlan Barua   PetscFunctionBegin;
902163d334eSBarry Smith   ierr = PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
9033c3a9c75SAmlan Barua   PetscFunctionReturn(0);
9043c3a9c75SAmlan Barua }
90554efbe56SHong Zhang 
90674a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
9075b3e41ffSAmlan Barua {
9085b3e41ffSAmlan Barua   PetscErrorCode ierr;
909ce94432eSBarry Smith   MPI_Comm       comm;
9105b3e41ffSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
9115b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
9129496c9aeSAmlan Barua   PetscInt       N     = fft->N;
913b3655f67SAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
9149496c9aeSAmlan Barua   PetscInt       low;
9157a21806cSHong Zhang   PetscMPIInt    rank,size;
9167a21806cSHong Zhang   ptrdiff_t      local_n0,local_0_start;
9179496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
9185b3e41ffSAmlan Barua   VecScatter     vecscat;
9195b3e41ffSAmlan Barua   IS             list1,list2;
9209496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
9219496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
9229496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
923a31b9140SHong Zhang   PetscInt       NM;
9249496c9aeSAmlan Barua   ptrdiff_t      temp;
9259496c9aeSAmlan Barua #endif
9269496c9aeSAmlan Barua 
9273564f093SHong Zhang   PetscFunctionBegin;
928ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
9295b3e41ffSAmlan Barua   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
9305b3e41ffSAmlan Barua   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
9310298fd71SBarry Smith   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
9325b3e41ffSAmlan Barua 
933e81bb599SAmlan Barua   if (size==1) {
934b3655f67SAmlan Barua     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
935*9448b7f1SJunchao Zhang     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
9366971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9376971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9386971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
939b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
940e81bb599SAmlan Barua 
9413564f093SHong Zhang   } else {
942e81bb599SAmlan Barua     switch (ndim) {
943e81bb599SAmlan Barua     case 1:
94464657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
9457a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
94626fbe8dcSKarl Rupp 
9474f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);
9484f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n1,low,1,&list2);
949*9448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
95064657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
95164657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
95264657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
95364657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
95464657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
95564657d84SAmlan Barua #else
9566a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
95764657d84SAmlan Barua #endif
9585b3e41ffSAmlan Barua       break;
9595b3e41ffSAmlan Barua     case 2:
960bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
9617a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
96226fbe8dcSKarl Rupp 
9634f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
9644f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
965*9448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
966bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
967bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
969bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
970bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
971bd59e6c6SAmlan Barua #else
9727a21806cSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
97326fbe8dcSKarl Rupp 
974854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
975854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
9765b3e41ffSAmlan Barua 
977db4deed7SKarl Rupp       if (dim[1]%2==0) NM = dim[1]+2;
978db4deed7SKarl Rupp       else             NM = dim[1]+1;
9795b3e41ffSAmlan Barua 
9805b3e41ffSAmlan Barua       for (i=0; i<local_n0; i++) {
9815b3e41ffSAmlan Barua         for (j=0; j<dim[1]; j++) {
9825b3e41ffSAmlan Barua           tempindx = i*dim[1] + j;
9835b3e41ffSAmlan Barua           tempindx1 = i*NM + j;
98426fbe8dcSKarl Rupp 
9855b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
9865b3e41ffSAmlan Barua           indx2[tempindx]=low+tempindx1;
9875b3e41ffSAmlan Barua         }
9885b3e41ffSAmlan Barua       }
9895b3e41ffSAmlan Barua 
9905b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
9915b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
9925b3e41ffSAmlan Barua 
993*9448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
9945b3e41ffSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9955b3e41ffSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9965b3e41ffSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
997b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
998b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
999b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1000b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1001bd59e6c6SAmlan Barua #endif
10029496c9aeSAmlan Barua       break;
10035b3e41ffSAmlan Barua     case 3:
1004bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10057a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
100626fbe8dcSKarl Rupp 
10074f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
10084f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
1009*9448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1010bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1011bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1012bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1013bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1014bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1015bd59e6c6SAmlan Barua #else
10167a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
101726fbe8dcSKarl Rupp 
1018854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1019854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
102051d1eed7SAmlan Barua 
1021db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
1022db4deed7SKarl Rupp       else             NM = dim[2]+1;
102351d1eed7SAmlan Barua 
102451d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
102551d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
102651d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
102751d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
102851d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
102926fbe8dcSKarl Rupp 
103051d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
103151d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
103251d1eed7SAmlan Barua           }
103351d1eed7SAmlan Barua         }
103451d1eed7SAmlan Barua       }
103551d1eed7SAmlan Barua 
103651d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
103751d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
103851d1eed7SAmlan Barua 
1039*9448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
104051d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
104151d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
104251d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1043b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1044b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1045b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1046b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1047bd59e6c6SAmlan Barua #endif
10489496c9aeSAmlan Barua       break;
10495b3e41ffSAmlan Barua     default:
1050bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10517a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
105226fbe8dcSKarl Rupp 
10534f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
10544f8276c3SHong Zhang       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1055*9448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1056bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1057bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1058bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1059bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1060bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1061bd59e6c6SAmlan Barua #else
1062ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
106326fbe8dcSKarl Rupp 
1064ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
106526fbe8dcSKarl Rupp 
1066c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
106726fbe8dcSKarl Rupp 
1068ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1069ba6e06d0SAmlan Barua 
1070ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1071ba6e06d0SAmlan Barua 
1072854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1073854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1074ba6e06d0SAmlan Barua 
1075db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
1076db4deed7SKarl Rupp       else                  NM = 1;
1077ba6e06d0SAmlan Barua 
1078ba6e06d0SAmlan Barua       j = low;
10793564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1080ba6e06d0SAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
1081ba6e06d0SAmlan Barua         indx2[i] = j;
10823564f093SHong Zhang         if (k%dim[ndim-1]==0) j+=NM;
1083ba6e06d0SAmlan Barua         j++;
1084ba6e06d0SAmlan Barua       }
1085ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1086ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1087ba6e06d0SAmlan Barua 
1088*9448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1089ba6e06d0SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1090ba6e06d0SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1091ba6e06d0SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1092b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1093b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1094b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1095b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1096bd59e6c6SAmlan Barua #endif
10979496c9aeSAmlan Barua       break;
10985b3e41ffSAmlan Barua     }
1099e81bb599SAmlan Barua   }
11003564f093SHong Zhang   PetscFunctionReturn(0);
11015b3e41ffSAmlan Barua }
11025b3e41ffSAmlan Barua 
11033c3a9c75SAmlan Barua /*
11043564f093SHong Zhang     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
11053564f093SHong Zhang 
11063c3a9c75SAmlan Barua   Options Database Keys:
11073c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
11083c3a9c75SAmlan Barua 
11093c3a9c75SAmlan Barua    Level: intermediate
11103c3a9c75SAmlan Barua 
11113c3a9c75SAmlan Barua */
11128cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1113b2b77a04SHong Zhang {
1114b2b77a04SHong Zhang   PetscErrorCode ierr;
1115ce94432eSBarry Smith   MPI_Comm       comm;
1116b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1117b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1118b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
11195274ed1bSHong Zhang   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
11205274ed1bSHong Zhang   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1121b2b77a04SHong Zhang   PetscBool      flg;
11224f48bc67SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr;
112311902ff2SHong Zhang   PetscMPIInt    size,rank;
11249496c9aeSAmlan Barua   ptrdiff_t      *pdim;
11259496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
11269496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11279496c9aeSAmlan Barua   ptrdiff_t      temp;
11288ca4c034SAmlan Barua   PetscInt       N1,tot_dim;
11294f48bc67SAmlan Barua #else
11304f48bc67SAmlan Barua   PetscInt       n1;
11319496c9aeSAmlan Barua #endif
11329496c9aeSAmlan Barua 
1133b2b77a04SHong Zhang   PetscFunctionBegin;
1134ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1135b2b77a04SHong Zhang   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
113611902ff2SHong Zhang   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1137c92418ccSAmlan Barua 
11381878d478SAmlan Barua   fftw_mpi_init();
113911902ff2SHong Zhang   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
114011902ff2SHong Zhang   pdim[0] = dim[0];
11418ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11428ca4c034SAmlan Barua   tot_dim = 2*dim[0];
11438ca4c034SAmlan Barua #endif
11443564f093SHong Zhang   for (ctr=1; ctr<ndim; ctr++) {
11456882372aSHong Zhang     partial_dim *= dim[ctr];
114611902ff2SHong Zhang     pdim[ctr]    = dim[ctr];
11478ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1148db4deed7SKarl Rupp     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1149db4deed7SKarl Rupp     else             tot_dim *= dim[ctr];
11508ca4c034SAmlan Barua #endif
11516882372aSHong Zhang   }
11523c3a9c75SAmlan Barua 
1153b2b77a04SHong Zhang   if (size == 1) {
1154e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1155b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1156b2b77a04SHong Zhang     n    = N;
1157e81bb599SAmlan Barua #else
1158e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1159e81bb599SAmlan Barua     n    = tot_dim;
1160e81bb599SAmlan Barua #endif
1161e81bb599SAmlan Barua 
1162b2b77a04SHong Zhang   } else {
11637a21806cSHong Zhang     ptrdiff_t local_n0,local_0_start;
1164b2b77a04SHong Zhang     switch (ndim) {
1165b2b77a04SHong Zhang     case 1:
11663c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11673c3a9c75SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1168e5d7f247SAmlan Barua #else
11697a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
117026fbe8dcSKarl Rupp 
11716882372aSHong Zhang       n    = (PetscInt)local_n0;
11729496c9aeSAmlan Barua       n1   = (PetscInt)local_n1;
11739496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1174e5d7f247SAmlan Barua #endif
1175b2b77a04SHong Zhang       break;
1176b2b77a04SHong Zhang     case 2:
11775b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
11787a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1179b2b77a04SHong Zhang       /*
1180b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
11810ec8b6e3SBarry Smith        PetscSynchronizedFlush(comm,PETSC_STDOUT);
1182b2b77a04SHong Zhang        */
1183b2b77a04SHong Zhang       n    = (PetscInt)local_n0*dim[1];
1184b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
11855b3e41ffSAmlan Barua #else
1186c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
118726fbe8dcSKarl Rupp 
11885b3e41ffSAmlan Barua       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
1189c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
11905b3e41ffSAmlan Barua #endif
1191b2b77a04SHong Zhang       break;
1192b2b77a04SHong Zhang     case 3:
119351d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
11947a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
119526fbe8dcSKarl Rupp 
11966882372aSHong Zhang       n    = (PetscInt)local_n0*dim[1]*dim[2];
11976882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
119851d1eed7SAmlan Barua #else
1199c3eba89aSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
120026fbe8dcSKarl Rupp 
120151d1eed7SAmlan Barua       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1202c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
120351d1eed7SAmlan Barua #endif
1204b2b77a04SHong Zhang       break;
1205b2b77a04SHong Zhang     default:
1206b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12077a21806cSHong Zhang       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
120826fbe8dcSKarl Rupp 
12096882372aSHong Zhang       n    = (PetscInt)local_n0*partial_dim;
12106882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1211b3a17365SAmlan Barua #else
1212b3a17365SAmlan Barua       temp = pdim[ndim-1];
121326fbe8dcSKarl Rupp 
1214b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
121526fbe8dcSKarl Rupp 
1216c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
121726fbe8dcSKarl Rupp 
1218b3a17365SAmlan Barua       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1219b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
122026fbe8dcSKarl Rupp 
1221b3a17365SAmlan Barua       pdim[ndim-1] = temp;
122226fbe8dcSKarl Rupp 
1223c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1224b3a17365SAmlan Barua #endif
1225b2b77a04SHong Zhang       break;
1226b2b77a04SHong Zhang     }
1227b2b77a04SHong Zhang   }
12282277172eSMark Adams   free(pdim);
1229b2b77a04SHong Zhang   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1230b00a9115SJed Brown   ierr      = PetscNewLog(A,&fftw);CHKERRQ(ierr);
1231b2b77a04SHong Zhang   fft->data = (void*)fftw;
1232b2b77a04SHong Zhang 
1233b2b77a04SHong Zhang   fft->n            = n;
12340c74a584SJed Brown   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1235e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
123626fbe8dcSKarl Rupp 
12375e806cb5SMatthew G. Knepley   ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr);
12388c1d5ab3SHong Zhang   if (size == 1) {
1239a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
1240a1b6d50cSHong Zhang     fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1241a1b6d50cSHong Zhang #else
12428c1d5ab3SHong Zhang     fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1243a1b6d50cSHong Zhang #endif
12448c1d5ab3SHong Zhang   }
124526fbe8dcSKarl Rupp 
1246b1301b2eSHong Zhang   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1247c92418ccSAmlan Barua 
1248b2b77a04SHong Zhang   fftw->p_forward  = 0;
1249b2b77a04SHong Zhang   fftw->p_backward = 0;
1250b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1251b2b77a04SHong Zhang 
1252b2b77a04SHong Zhang   if (size == 1) {
1253b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1254b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1255b2b77a04SHong Zhang   } else {
1256b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1257b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1258b2b77a04SHong Zhang   }
1259b2b77a04SHong Zhang   fft->matdestroy = MatDestroy_FFTW;
1260b2b77a04SHong Zhang   A->assembled    = PETSC_TRUE;
12614a52bad8SHong Zhang   A->preallocated = PETSC_TRUE;
126226fbe8dcSKarl Rupp 
12632a7a6963SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr);
1264bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
1265bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1266b2b77a04SHong Zhang 
1267b2b77a04SHong Zhang   /* get runtime options */
1268ce94432eSBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
12695274ed1bSHong Zhang   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr);
12705274ed1bSHong Zhang   if (flg) {
12715274ed1bSHong Zhang     fftw->p_flag = iplans[p_flag];
12725274ed1bSHong Zhang   }
12734a52bad8SHong Zhang   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1274b2b77a04SHong Zhang   PetscFunctionReturn(0);
1275b2b77a04SHong Zhang }
12763c3a9c75SAmlan Barua 
12773c3a9c75SAmlan Barua 
12783c3a9c75SAmlan Barua 
12793c3a9c75SAmlan Barua 
1280