xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 2e956fe4fc852fabc23b437482e1fb7b77fddb0d)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4c4762a1bSJed Brown     Testing examples can be found in ~src/mat/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
90cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
10c6db04a5SJed Brown #include <fftw3-mpi.h>
110cf2e031SBarry Smith #else
120cf2e031SBarry Smith #include <fftw3.h>
130cf2e031SBarry Smith #endif
14b2b77a04SHong Zhang EXTERN_C_END
15b2b77a04SHong Zhang 
16b2b77a04SHong Zhang typedef struct {
17b9ae062cSHong Zhang   ptrdiff_t    ndim_fftw,*dim_fftw;
18a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
19a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
20a1b6d50cSHong Zhang #else
218c1d5ab3SHong Zhang   fftw_iodim   *iodims;
22a1b6d50cSHong Zhang #endif
23e5d7f247SAmlan Barua   PetscInt     partial_dim;
24b2b77a04SHong Zhang   fftw_plan    p_forward,p_backward;
25b2b77a04SHong Zhang   unsigned     p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
26b2b77a04SHong Zhang   PetscScalar  *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
27b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
28b2b77a04SHong Zhang } Mat_FFTW;
29b2b77a04SHong Zhang 
30b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
31b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
320cf2e031SBarry Smith extern PetscErrorCode MatDestroy_FFTW(Mat);
330cf2e031SBarry Smith extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat,Vec*,Vec*,Vec*);
340cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
35b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
36b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
37b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
380cf2e031SBarry Smith #endif
39b2b77a04SHong Zhang 
400cf2e031SBarry Smith /*
410cf2e031SBarry Smith    MatMult_SeqFFTW performs forward DFT
4297504071SAmlan Barua    Input parameter:
433564f093SHong Zhang      A - the matrix
4497504071SAmlan Barua      x - the vector on which FDFT will be performed
4597504071SAmlan Barua 
4697504071SAmlan Barua    Output parameter:
4797504071SAmlan Barua      y - vector that stores result of FDFT
4897504071SAmlan Barua */
49b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
50b2b77a04SHong Zhang {
51b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
52b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
53f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
54f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
551acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
56a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
57a1b6d50cSHong Zhang   fftw_iodim64   *iodims;
58a1b6d50cSHong Zhang #else
598c1d5ab3SHong Zhang   fftw_iodim     *iodims;
60a1b6d50cSHong Zhang #endif
611acd80e4SHong Zhang   PetscInt       i;
621acd80e4SHong Zhang #endif
631acd80e4SHong Zhang   PetscInt       ndim = fft->ndim,*dim = fft->dim;
64b2b77a04SHong Zhang 
65b2b77a04SHong Zhang   PetscFunctionBegin;
669566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x,&x_array));
679566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y,&y_array));
686aad120cSJose E. Roman   if (!fftw->p_forward) { /* create a plan, then execute it */
69b2b77a04SHong Zhang     switch (ndim) {
70b2b77a04SHong Zhang     case 1:
7158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
72b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
7358a912c5SAmlan Barua #else
7458a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
7558a912c5SAmlan Barua #endif
76b2b77a04SHong Zhang       break;
77b2b77a04SHong Zhang     case 2:
7858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
79b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
8058a912c5SAmlan Barua #else
8158a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
8258a912c5SAmlan Barua #endif
83b2b77a04SHong Zhang       break;
84b2b77a04SHong Zhang     case 3:
8558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
86b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
8758a912c5SAmlan Barua #else
8858a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
8958a912c5SAmlan Barua #endif
90b2b77a04SHong Zhang       break;
91b2b77a04SHong Zhang     default:
9258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
93a1b6d50cSHong Zhang       iodims = fftw->iodims;
94a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
958c1d5ab3SHong Zhang       if (ndim) {
96a1b6d50cSHong Zhang         iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
978c1d5ab3SHong Zhang         iodims[ndim-1].is = iodims[ndim-1].os = 1;
988c1d5ab3SHong Zhang         for (i=ndim-2; i>=0; --i) {
99a1b6d50cSHong Zhang           iodims[i].n = (ptrdiff_t)dim[i];
1008c1d5ab3SHong Zhang           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
1018c1d5ab3SHong Zhang         }
1028c1d5ab3SHong Zhang       }
103a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
104a1b6d50cSHong Zhang #else
105a1b6d50cSHong Zhang       if (ndim) {
106a1b6d50cSHong Zhang         iodims[ndim-1].n = (int)dim[ndim-1];
107a1b6d50cSHong Zhang         iodims[ndim-1].is = iodims[ndim-1].os = 1;
108a1b6d50cSHong Zhang         for (i=ndim-2; i>=0; --i) {
109a1b6d50cSHong Zhang           iodims[i].n = (int)dim[i];
110a1b6d50cSHong Zhang           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
111a1b6d50cSHong Zhang         }
112a1b6d50cSHong Zhang       }
113a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
114a1b6d50cSHong Zhang #endif
115a1b6d50cSHong Zhang 
11658a912c5SAmlan Barua #else
117a31b9140SHong Zhang       fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
11858a912c5SAmlan Barua #endif
119b2b77a04SHong Zhang       break;
120b2b77a04SHong Zhang     }
121f4fca6d4SMatthew G. Knepley     fftw->finarray  = (PetscScalar *) x_array;
122b2b77a04SHong Zhang     fftw->foutarray = y_array;
123b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
124b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
125b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
126b2b77a04SHong Zhang   } else { /* use existing plan */
127b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
1287e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
129b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
1307e4bc134SDominic Meiser #else
1317e4bc134SDominic Meiser       fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array);
1327e4bc134SDominic Meiser #endif
133b2b77a04SHong Zhang     } else {
134b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
135b2b77a04SHong Zhang     }
136b2b77a04SHong Zhang   }
1379566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y,&y_array));
1389566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x,&x_array));
139b2b77a04SHong Zhang   PetscFunctionReturn(0);
140b2b77a04SHong Zhang }
141b2b77a04SHong Zhang 
14297504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT
14397504071SAmlan Barua    Input parameter:
1443564f093SHong Zhang      A - the matrix
14597504071SAmlan Barua      x - the vector on which BDFT will be performed
14697504071SAmlan Barua 
14797504071SAmlan Barua    Output parameter:
14897504071SAmlan Barua      y - vector that stores result of BDFT
14997504071SAmlan Barua */
15097504071SAmlan Barua 
151b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
152b2b77a04SHong Zhang {
153b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
154b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
155f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
156f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
157b2b77a04SHong Zhang   PetscInt       ndim = fft->ndim,*dim = fft->dim;
1581acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
159a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
160a1b6d50cSHong Zhang   fftw_iodim64   *iodims = fftw->iodims;
161a1b6d50cSHong Zhang #else
162a1b6d50cSHong Zhang   fftw_iodim     *iodims = fftw->iodims;
163a1b6d50cSHong Zhang #endif
1641acd80e4SHong Zhang #endif
165b2b77a04SHong Zhang 
166b2b77a04SHong Zhang   PetscFunctionBegin;
1679566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x,&x_array));
1689566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y,&y_array));
1696aad120cSJose E. Roman   if (!fftw->p_backward) { /* create a plan, then execute it */
170b2b77a04SHong Zhang     switch (ndim) {
171b2b77a04SHong Zhang     case 1:
17258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
173b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
17458a912c5SAmlan Barua #else
175e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
17658a912c5SAmlan Barua #endif
177b2b77a04SHong Zhang       break;
178b2b77a04SHong Zhang     case 2:
17958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
180b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
18158a912c5SAmlan Barua #else
182e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
18358a912c5SAmlan Barua #endif
184b2b77a04SHong Zhang       break;
185b2b77a04SHong Zhang     case 3:
18658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
187b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
18858a912c5SAmlan Barua #else
189e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
19058a912c5SAmlan Barua #endif
191b2b77a04SHong Zhang       break;
192b2b77a04SHong Zhang     default:
19358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
194a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
195a1b6d50cSHong Zhang       fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
196a1b6d50cSHong Zhang #else
1978c1d5ab3SHong Zhang       fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
198a1b6d50cSHong Zhang #endif
19958a912c5SAmlan Barua #else
200a31b9140SHong Zhang       fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
20158a912c5SAmlan Barua #endif
202b2b77a04SHong Zhang       break;
203b2b77a04SHong Zhang     }
204f4fca6d4SMatthew G. Knepley     fftw->binarray  = (PetscScalar *) x_array;
205b2b77a04SHong Zhang     fftw->boutarray = y_array;
2062f613bf5SBarry Smith     fftw_execute(fftw->p_backward);
207b2b77a04SHong Zhang   } else { /* use existing plan */
208b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
2097e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
210b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
2117e4bc134SDominic Meiser #else
2127e4bc134SDominic Meiser       fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array);
2137e4bc134SDominic Meiser #endif
214b2b77a04SHong Zhang     } else {
2152f613bf5SBarry Smith       fftw_execute(fftw->p_backward);
216b2b77a04SHong Zhang     }
217b2b77a04SHong Zhang   }
2189566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y,&y_array));
2199566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x,&x_array));
220b2b77a04SHong Zhang   PetscFunctionReturn(0);
221b2b77a04SHong Zhang }
222b2b77a04SHong Zhang 
2230cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
22497504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel
22597504071SAmlan Barua    Input parameter:
2263564f093SHong Zhang      A - the matrix
22797504071SAmlan Barua      x - the vector on which FDFT will be performed
22897504071SAmlan Barua 
22997504071SAmlan Barua    Output parameter:
23097504071SAmlan Barua    y   - vector that stores result of FDFT
23197504071SAmlan Barua */
232b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
233b2b77a04SHong Zhang {
234b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
235b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
236f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
237f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
238c92418ccSAmlan Barua   PetscInt       ndim = fft->ndim,*dim = fft->dim;
239ce94432eSBarry Smith   MPI_Comm       comm;
240b2b77a04SHong Zhang 
241b2b77a04SHong Zhang   PetscFunctionBegin;
2429566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2439566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x,&x_array));
2449566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y,&y_array));
2456aad120cSJose E. Roman   if (!fftw->p_forward) { /* create a plan, then execute it */
246b2b77a04SHong Zhang     switch (ndim) {
247b2b77a04SHong Zhang     case 1:
2483c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
249b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
250ae0a50aaSHong Zhang #else
2514f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
2523c3a9c75SAmlan Barua #endif
253b2b77a04SHong Zhang       break;
254b2b77a04SHong Zhang     case 2:
25597504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
256b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2573c3a9c75SAmlan Barua #else
2583c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2593c3a9c75SAmlan Barua #endif
260b2b77a04SHong Zhang       break;
261b2b77a04SHong Zhang     case 3:
2623c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
263b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2643c3a9c75SAmlan Barua #else
26551d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2663c3a9c75SAmlan Barua #endif
267b2b77a04SHong Zhang       break;
268b2b77a04SHong Zhang     default:
2693c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
270c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2713c3a9c75SAmlan Barua #else
2723c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2733c3a9c75SAmlan Barua #endif
274b2b77a04SHong Zhang       break;
275b2b77a04SHong Zhang     }
276f4fca6d4SMatthew G. Knepley     fftw->finarray  = (PetscScalar *) x_array;
277b2b77a04SHong Zhang     fftw->foutarray = y_array;
278b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
279b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
280b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
281b2b77a04SHong Zhang   } else { /* use existing plan */
282b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
283b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
284b2b77a04SHong Zhang     } else {
285b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
286b2b77a04SHong Zhang     }
287b2b77a04SHong Zhang   }
2889566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y,&y_array));
2899566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x,&x_array));
290b2b77a04SHong Zhang   PetscFunctionReturn(0);
291b2b77a04SHong Zhang }
292b2b77a04SHong Zhang 
2930cf2e031SBarry Smith /*
2940cf2e031SBarry Smith    MatMultTranspose_MPIFFTW performs parallel backward DFT
29597504071SAmlan Barua    Input parameter:
2963564f093SHong Zhang      A - the matrix
29797504071SAmlan Barua      x - the vector on which BDFT will be performed
29897504071SAmlan Barua 
29997504071SAmlan Barua    Output parameter:
30097504071SAmlan Barua      y - vector that stores result of BDFT
30197504071SAmlan Barua */
302b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
303b2b77a04SHong Zhang {
304b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
305b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
306f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
307f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
308c92418ccSAmlan Barua   PetscInt       ndim = fft->ndim,*dim = fft->dim;
309ce94432eSBarry Smith   MPI_Comm       comm;
310b2b77a04SHong Zhang 
311b2b77a04SHong Zhang   PetscFunctionBegin;
3129566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
3139566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x,&x_array));
3149566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y,&y_array));
3156aad120cSJose E. Roman   if (!fftw->p_backward) { /* create a plan, then execute it */
316b2b77a04SHong Zhang     switch (ndim) {
317b2b77a04SHong Zhang     case 1:
3183c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
319b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
320ae0a50aaSHong Zhang #else
3214f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
3223c3a9c75SAmlan Barua #endif
323b2b77a04SHong Zhang       break;
324b2b77a04SHong Zhang     case 2:
32597504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
326b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3273c3a9c75SAmlan Barua #else
3283c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3293c3a9c75SAmlan Barua #endif
330b2b77a04SHong Zhang       break;
331b2b77a04SHong Zhang     case 3:
3323c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
333b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3343c3a9c75SAmlan Barua #else
3353c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3363c3a9c75SAmlan Barua #endif
337b2b77a04SHong Zhang       break;
338b2b77a04SHong Zhang     default:
3393c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
340c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3413c3a9c75SAmlan Barua #else
3423c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3433c3a9c75SAmlan Barua #endif
344b2b77a04SHong Zhang       break;
345b2b77a04SHong Zhang     }
346f4fca6d4SMatthew G. Knepley     fftw->binarray  = (PetscScalar *) x_array;
347b2b77a04SHong Zhang     fftw->boutarray = y_array;
3482f613bf5SBarry Smith     fftw_execute(fftw->p_backward);
349b2b77a04SHong Zhang   } else { /* use existing plan */
350b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
351b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
352b2b77a04SHong Zhang     } else {
3532f613bf5SBarry Smith       fftw_execute(fftw->p_backward);
354b2b77a04SHong Zhang     }
355b2b77a04SHong Zhang   }
3569566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y,&y_array));
3579566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x,&x_array));
358b2b77a04SHong Zhang   PetscFunctionReturn(0);
359b2b77a04SHong Zhang }
3600cf2e031SBarry Smith #endif
361b2b77a04SHong Zhang 
362b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
363b2b77a04SHong Zhang {
364b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
365b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
366b2b77a04SHong Zhang 
367b2b77a04SHong Zhang   PetscFunctionBegin;
368b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
369b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
3708c1d5ab3SHong Zhang   if (fftw->iodims) {
3718c1d5ab3SHong Zhang     free(fftw->iodims);
3728c1d5ab3SHong Zhang   }
3739566063dSJacob Faibussowitsch   PetscCall(PetscFree(fftw->dim_fftw));
3749566063dSJacob Faibussowitsch   PetscCall(PetscFree(fft->data));
375*2e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",NULL));
376*2e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",NULL));
377*2e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",NULL));
3780cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
3796ccf0b3eSHong Zhang   fftw_mpi_cleanup();
3800cf2e031SBarry Smith #endif
381b2b77a04SHong Zhang   PetscFunctionReturn(0);
382b2b77a04SHong Zhang }
383b2b77a04SHong Zhang 
3840cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
385c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
386b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
387b2b77a04SHong Zhang {
388b2b77a04SHong Zhang   PetscScalar    *array;
389b2b77a04SHong Zhang 
390b2b77a04SHong Zhang   PetscFunctionBegin;
3919566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v,&array));
3922f613bf5SBarry Smith   fftw_free((fftw_complex*)array);
3939566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v,&array));
3949566063dSJacob Faibussowitsch   PetscCall(VecDestroy_MPI(v));
395b2b77a04SHong Zhang   PetscFunctionReturn(0);
396b2b77a04SHong Zhang }
3970cf2e031SBarry Smith #endif
398b2b77a04SHong Zhang 
3990cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
4005b113e21Ss-sajid-ali static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin,Vec *fin_new)
4015b113e21Ss-sajid-ali {
4025b113e21Ss-sajid-ali   Mat            A;
4035b113e21Ss-sajid-ali 
4045b113e21Ss-sajid-ali   PetscFunctionBegin;
4059566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)fin,"FFTmatrix",(PetscObject*)&A));
4069566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A,fin_new,NULL,NULL));
4075b113e21Ss-sajid-ali   PetscFunctionReturn(0);
4085b113e21Ss-sajid-ali }
4095b113e21Ss-sajid-ali 
4105b113e21Ss-sajid-ali static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout,Vec *fout_new)
4115b113e21Ss-sajid-ali {
4125b113e21Ss-sajid-ali   Mat            A;
4135b113e21Ss-sajid-ali 
4145b113e21Ss-sajid-ali   PetscFunctionBegin;
4159566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)fout,"FFTmatrix",(PetscObject*)&A));
4169566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A,NULL,fout_new,NULL));
4175b113e21Ss-sajid-ali   PetscFunctionReturn(0);
4185b113e21Ss-sajid-ali }
4195b113e21Ss-sajid-ali 
4205b113e21Ss-sajid-ali static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
4215b113e21Ss-sajid-ali {
4225b113e21Ss-sajid-ali   Mat            A;
4235b113e21Ss-sajid-ali 
4245b113e21Ss-sajid-ali   PetscFunctionBegin;
4259566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)bout,"FFTmatrix",(PetscObject*)&A));
4269566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A,NULL,NULL,bout_new));
4275b113e21Ss-sajid-ali   PetscFunctionReturn(0);
4285b113e21Ss-sajid-ali }
4290cf2e031SBarry Smith #endif
4305b113e21Ss-sajid-ali 
4314be45526SHong Zhang /*@
4322a7a6963SBarry Smith    MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
4334f7415efSAmlan Barua      parallel layout determined by FFTW
4344f7415efSAmlan Barua 
4354f7415efSAmlan Barua    Collective on Mat
4364f7415efSAmlan Barua 
4374f7415efSAmlan Barua    Input Parameter:
4383564f093SHong Zhang .   A - the matrix
4394f7415efSAmlan Barua 
440d8d19677SJose E. Roman    Output Parameters:
441cc55f3b1SHong Zhang +   x - (optional) input vector of forward FFTW
4426b867d5aSJose E. Roman .   y - (optional) output vector of forward FFTW
443cc55f3b1SHong Zhang -   z - (optional) output vector of backward FFTW
4444f7415efSAmlan Barua 
4454f7415efSAmlan Barua   Level: advanced
4463564f093SHong Zhang 
44797504071SAmlan Barua   Note: The parallel layout of output of forward FFTW is always same as the input
44897504071SAmlan Barua         of the backward FFTW. But parallel layout of the input vector of forward
44997504071SAmlan Barua         FFTW might not be same as the output of backward FFTW.
45097504071SAmlan Barua         Also note that we need to provide enough space while doing parallel real transform.
45197504071SAmlan Barua         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
45297504071SAmlan Barua         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
453e0ec536eSAmlan Barua         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
454e0ec536eSAmlan Barua         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
455e0ec536eSAmlan Barua         zeros if it is odd only one zero is needed.
456e0ec536eSAmlan Barua         Lastly one needs some scratch space at the end of data set in each process. alloc_local
457e0ec536eSAmlan Barua         figures out how much space is needed, i.e. it figures out the data+scratch space for
458e0ec536eSAmlan Barua         each processor and returns that.
4594f7415efSAmlan Barua 
460db781477SPatrick Sanan .seealso: `MatCreateFFT()`
4614be45526SHong Zhang @*/
4622a7a6963SBarry Smith PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
4634be45526SHong Zhang {
4644be45526SHong Zhang   PetscFunctionBegin;
465cac4c232SBarry Smith   PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));
4664be45526SHong Zhang   PetscFunctionReturn(0);
4674be45526SHong Zhang }
4684be45526SHong Zhang 
4692a7a6963SBarry Smith PetscErrorCode  MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
4704f7415efSAmlan Barua {
4714f7415efSAmlan Barua   PetscMPIInt    size,rank;
472ce94432eSBarry Smith   MPI_Comm       comm;
4734f7415efSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
4744f7415efSAmlan Barua 
4754f7415efSAmlan Barua   PetscFunctionBegin;
4764f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
4774f7415efSAmlan Barua   PetscValidType(A,1);
4789566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
4794f7415efSAmlan Barua 
4809566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
4819566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4824f7415efSAmlan Barua   if (size == 1) { /* sequential case */
4834f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4849566063dSJacob Faibussowitsch     if (fin)  PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->N,fin));
4859566063dSJacob Faibussowitsch     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->N,fout));
4869566063dSJacob Faibussowitsch     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->N,bout));
4874f7415efSAmlan Barua #else
4889566063dSJacob Faibussowitsch     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->n,fin));
4899566063dSJacob Faibussowitsch     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->n,fout));
4909566063dSJacob Faibussowitsch     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->n,bout));
4914f7415efSAmlan Barua #endif
4920cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
49397504071SAmlan Barua   } else { /* parallel cases */
4940cf2e031SBarry Smith     Mat_FFTW     *fftw = (Mat_FFTW*)fft->data;
4950cf2e031SBarry Smith     PetscInt     ndim  = fft->ndim,*dim = fft->dim;
4964f7415efSAmlan Barua     ptrdiff_t    alloc_local,local_n0,local_0_start;
4979496c9aeSAmlan Barua     ptrdiff_t    local_n1;
4989496c9aeSAmlan Barua     fftw_complex *data_fout;
4999496c9aeSAmlan Barua     ptrdiff_t    local_1_start;
5009496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
5019496c9aeSAmlan Barua     fftw_complex *data_fin,*data_bout;
5029496c9aeSAmlan Barua #else
5034f7415efSAmlan Barua     double       *data_finr,*data_boutr;
5046ccf0b3eSHong Zhang     PetscInt     n1,N1;
5059496c9aeSAmlan Barua     ptrdiff_t    temp;
5069496c9aeSAmlan Barua #endif
5079496c9aeSAmlan Barua 
5084f7415efSAmlan Barua     switch (ndim) {
5094f7415efSAmlan Barua     case 1:
51057625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5114f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
51257625b84SAmlan Barua #else
51357625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
51457625b84SAmlan Barua       if (fin) {
51557625b84SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5169566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm,1,local_n0,fft->N,(const PetscScalar*)data_fin,fin));
5179566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A));
5185b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
51957625b84SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
52057625b84SAmlan Barua       }
52157625b84SAmlan Barua       if (fout) {
52257625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5239566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_fout,fout));
5249566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A));
5255b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
52657625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
52757625b84SAmlan Barua       }
52857625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
52957625b84SAmlan Barua       if (bout) {
53057625b84SAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5319566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_bout,bout));
5329566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A));
5335b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
53457625b84SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
53557625b84SAmlan Barua       }
53657625b84SAmlan Barua       break;
53757625b84SAmlan Barua #endif
5384f7415efSAmlan Barua     case 2:
53997504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
5404f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5414f7415efSAmlan Barua       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
5424f7415efSAmlan Barua       if (fin) {
5434f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
5449566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin));
5459566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A));
5465b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5474f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5484f7415efSAmlan Barua       }
54957625b84SAmlan Barua       if (fout) {
55057625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5519566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout));
5529566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A));
5535b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
55457625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
55557625b84SAmlan Barua       }
5565b113e21Ss-sajid-ali       if (bout) {
5575b113e21Ss-sajid-ali         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
5589566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout));
5599566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A));
5605b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5615b113e21Ss-sajid-ali         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5625b113e21Ss-sajid-ali       }
5634f7415efSAmlan Barua #else
5644f7415efSAmlan Barua       /* Get local size */
5654f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
5664f7415efSAmlan Barua       if (fin) {
5674f7415efSAmlan Barua         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5689566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin));
5699566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A));
5705b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5714f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5724f7415efSAmlan Barua       }
5734f7415efSAmlan Barua       if (fout) {
5744f7415efSAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5759566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout));
5769566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A));
5775b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
5784f7415efSAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5794f7415efSAmlan Barua       }
5804f7415efSAmlan Barua       if (bout) {
5814f7415efSAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5829566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout));
5839566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A));
5845b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5854f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5864f7415efSAmlan Barua       }
5874f7415efSAmlan Barua #endif
5884f7415efSAmlan Barua       break;
5894f7415efSAmlan Barua     case 3:
5904f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5914f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5924f7415efSAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
5934f7415efSAmlan Barua       if (fin) {
5944f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
5959566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin));
5969566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A));
5975b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5984f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5994f7415efSAmlan Barua       }
6005b113e21Ss-sajid-ali       if (fout) {
6015b113e21Ss-sajid-ali         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6029566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout));
6039566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A));
6045b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6055b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6065b113e21Ss-sajid-ali       }
6074f7415efSAmlan Barua       if (bout) {
6084f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
6099566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout));
6109566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A));
6115b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6124f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6134f7415efSAmlan Barua       }
6144f7415efSAmlan Barua #else
6150c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
6160c9b18e4SAmlan Barua       if (fin) {
6170c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6189566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin));
6199566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A));
6205b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6210c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6220c9b18e4SAmlan Barua       }
6230c9b18e4SAmlan Barua       if (fout) {
6240c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6259566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout));
6269566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A));
6275b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6280c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6290c9b18e4SAmlan Barua       }
6300c9b18e4SAmlan Barua       if (bout) {
6310c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6329566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout));
6339566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A));
6345b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6350c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6360c9b18e4SAmlan Barua       }
6374f7415efSAmlan Barua #endif
6384f7415efSAmlan Barua       break;
6394f7415efSAmlan Barua     default:
6404f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6414f7415efSAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
6424f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
6434f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
6440cf2e031SBarry Smith       N1          = 2*fft->N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
6454f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
6464f7415efSAmlan Barua 
6474f7415efSAmlan Barua       if (fin) {
6484f7415efSAmlan Barua         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
6499566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_finr,fin));
6509566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A));
6515b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6524f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6534f7415efSAmlan Barua       }
6545b113e21Ss-sajid-ali       if (fout) {
6555b113e21Ss-sajid-ali         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6569566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_fout,fout));
6579566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A));
6585b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6595b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6605b113e21Ss-sajid-ali       }
6614f7415efSAmlan Barua       if (bout) {
6624f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
6639566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_boutr,bout));
6649566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A));
6655b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6669496c9aeSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6674f7415efSAmlan Barua       }
6684f7415efSAmlan Barua #else
6690c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
6700c9b18e4SAmlan Barua       if (fin) {
6710c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6729566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin));
6739566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A));
6745b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6750c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6760c9b18e4SAmlan Barua       }
6770c9b18e4SAmlan Barua       if (fout) {
6780c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6799566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout));
6809566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A));
6815b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6820c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6830c9b18e4SAmlan Barua       }
6840c9b18e4SAmlan Barua       if (bout) {
6850c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6869566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout));
6879566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A));
6885b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6890c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6900c9b18e4SAmlan Barua       }
6914f7415efSAmlan Barua #endif
6924f7415efSAmlan Barua       break;
6934f7415efSAmlan Barua     }
694f9d91177SJunchao Zhang     /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
695f9d91177SJunchao Zhang        v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
696f9d91177SJunchao Zhang        memory leaks. We void these pointers here to avoid acident uses.
697f9d91177SJunchao Zhang     */
698f9d91177SJunchao Zhang     if (fin)  (*fin)->ops->replacearray = NULL;
699f9d91177SJunchao Zhang     if (fout) (*fout)->ops->replacearray = NULL;
700f9d91177SJunchao Zhang     if (bout) (*bout)->ops->replacearray = NULL;
7010cf2e031SBarry Smith #endif
7024f7415efSAmlan Barua   }
7034f7415efSAmlan Barua   PetscFunctionReturn(0);
7044f7415efSAmlan Barua }
7054f7415efSAmlan Barua 
7063564f093SHong Zhang /*@
7073564f093SHong Zhang    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
70854efbe56SHong Zhang 
7093564f093SHong Zhang    Collective on Mat
7103564f093SHong Zhang 
7113564f093SHong Zhang    Input Parameters:
7123564f093SHong Zhang +  A - FFTW matrix
7133564f093SHong Zhang -  x - the PETSc vector
7143564f093SHong Zhang 
7153564f093SHong Zhang    Output Parameters:
7163564f093SHong Zhang .  y - the FFTW vector
7173564f093SHong Zhang 
718b2b77a04SHong Zhang   Options Database Keys:
7193564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags
720b2b77a04SHong Zhang 
721b2b77a04SHong Zhang    Level: intermediate
7223564f093SHong Zhang 
72397504071SAmlan Barua    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
72497504071SAmlan Barua          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
72597504071SAmlan Barua          zeros. This routine does that job by scattering operation.
72697504071SAmlan Barua 
727db781477SPatrick Sanan .seealso: `VecScatterFFTWToPetsc()`
7283564f093SHong Zhang @*/
7293564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
7303564f093SHong Zhang {
7313564f093SHong Zhang   PetscFunctionBegin;
732cac4c232SBarry Smith   PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));
7333564f093SHong Zhang   PetscFunctionReturn(0);
7343564f093SHong Zhang }
7353c3a9c75SAmlan Barua 
73674a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
7373c3a9c75SAmlan Barua {
738ce94432eSBarry Smith   MPI_Comm       comm;
7393c3a9c75SAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
7409496c9aeSAmlan Barua   PetscInt       low;
7417a21806cSHong Zhang   PetscMPIInt    rank,size;
7427a21806cSHong Zhang   PetscInt       vsize,vsize1;
7433c3a9c75SAmlan Barua   VecScatter     vecscat;
7440cf2e031SBarry Smith   IS             list1;
7453c3a9c75SAmlan Barua 
7463564f093SHong Zhang   PetscFunctionBegin;
7479566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
7489566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
7499566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
7509566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRange(y,&low,NULL));
7513c3a9c75SAmlan Barua 
7523564f093SHong Zhang   if (size==1) {
7539566063dSJacob Faibussowitsch     PetscCall(VecGetSize(x,&vsize));
7549566063dSJacob Faibussowitsch     PetscCall(VecGetSize(y,&vsize1));
7559566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PETSC_COMM_SELF,fft->N,0,1,&list1));
7569566063dSJacob Faibussowitsch     PetscCall(VecScatterCreate(x,list1,y,list1,&vecscat));
7579566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
7589566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
7599566063dSJacob Faibussowitsch     PetscCall(VecScatterDestroy(&vecscat));
7609566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&list1));
7610cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
7623564f093SHong Zhang   } else {
7630cf2e031SBarry Smith     Mat_FFTW   *fftw = (Mat_FFTW*)fft->data;
7640cf2e031SBarry Smith     PetscInt   ndim  = fft->ndim,*dim = fft->dim;
7650cf2e031SBarry Smith     ptrdiff_t  local_n0,local_0_start;
7660cf2e031SBarry Smith     ptrdiff_t  local_n1,local_1_start;
7670cf2e031SBarry Smith     IS         list2;
7680cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX)
7690cf2e031SBarry Smith     PetscInt   i,j,k,partial_dim;
7700cf2e031SBarry Smith     PetscInt   *indx1, *indx2, tempindx, tempindx1;
7710cf2e031SBarry Smith     PetscInt   NM;
7720cf2e031SBarry Smith     ptrdiff_t  temp;
7730cf2e031SBarry Smith #endif
7740cf2e031SBarry Smith 
7753c3a9c75SAmlan Barua     switch (ndim) {
7763c3a9c75SAmlan Barua     case 1:
77764657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7787a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
77926fbe8dcSKarl Rupp 
7809566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm,local_n0,local_0_start,1,&list1));
7819566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm,local_n0,low,1,&list2));
7829566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
7839566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
7849566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
7859566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
7869566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
7879566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
78864657d84SAmlan Barua #else
789e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
79064657d84SAmlan Barua #endif
7913c3a9c75SAmlan Barua       break;
7923c3a9c75SAmlan Barua     case 2:
793bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7947a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
79526fbe8dcSKarl Rupp 
7969566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1));
7979566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm,local_n0*dim[1],low,1,&list2));
7989566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
7999566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
8009566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
8019566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8029566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8039566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
804bd59e6c6SAmlan Barua #else
805c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
80626fbe8dcSKarl Rupp 
8079566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1));
8089566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2));
8093c3a9c75SAmlan Barua 
8103564f093SHong Zhang       if (dim[1]%2==0) {
8113c3a9c75SAmlan Barua         NM = dim[1]+2;
8123564f093SHong Zhang       } else {
8133c3a9c75SAmlan Barua         NM = dim[1]+1;
8143564f093SHong Zhang       }
8153c3a9c75SAmlan Barua       for (i=0; i<local_n0; i++) {
8163c3a9c75SAmlan Barua         for (j=0; j<dim[1]; j++) {
8173c3a9c75SAmlan Barua           tempindx  = i*dim[1] + j;
8183c3a9c75SAmlan Barua           tempindx1 = i*NM + j;
81926fbe8dcSKarl Rupp 
8205b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
8213c3a9c75SAmlan Barua           indx2[tempindx]=low+tempindx1;
8223c3a9c75SAmlan Barua         }
8233c3a9c75SAmlan Barua       }
8243c3a9c75SAmlan Barua 
8259566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1));
8269566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2));
8273c3a9c75SAmlan Barua 
8289566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
8299566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
8309566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
8319566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8329566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8339566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
8349566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
8359566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
836bd59e6c6SAmlan Barua #endif
8379496c9aeSAmlan Barua       break;
8383c3a9c75SAmlan Barua 
8393c3a9c75SAmlan Barua     case 3:
840bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
8417a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
84226fbe8dcSKarl Rupp 
8439566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1));
8449566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2));
8459566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
8469566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
8479566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
8489566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8499566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8509566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
851bd59e6c6SAmlan Barua #else
852c4762a1bSJed Brown       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
853f1ade23cSHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform");
8547a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
85526fbe8dcSKarl Rupp 
8569566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1));
8579566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2));
85851d1eed7SAmlan Barua 
859db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
860db4deed7SKarl Rupp       else             NM = dim[2]+1;
86151d1eed7SAmlan Barua 
86251d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
86351d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
86451d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
86551d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
86651d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
86726fbe8dcSKarl Rupp 
86851d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
86951d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
87051d1eed7SAmlan Barua           }
87151d1eed7SAmlan Barua         }
87251d1eed7SAmlan Barua       }
87351d1eed7SAmlan Barua 
8749566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1));
8759566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2));
8769566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
8779566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
8789566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
8799566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8809566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8819566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
8829566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
8839566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
884bd59e6c6SAmlan Barua #endif
8859496c9aeSAmlan Barua       break;
8863c3a9c75SAmlan Barua 
8873c3a9c75SAmlan Barua     default:
888bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
8897a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
89026fbe8dcSKarl Rupp 
8919566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1));
8929566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2));
8939566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
8949566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
8959566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
8969566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8979566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8989566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
899bd59e6c6SAmlan Barua #else
900c4762a1bSJed Brown       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
901f1ade23cSHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform");
902e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
90326fbe8dcSKarl Rupp 
904e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
90526fbe8dcSKarl Rupp 
9067a21806cSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
90726fbe8dcSKarl Rupp 
908e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
909e5d7f247SAmlan Barua 
910e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
911e5d7f247SAmlan Barua 
9129566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1));
9139566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2));
914e5d7f247SAmlan Barua 
915db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
916db4deed7SKarl Rupp       else                  NM = 1;
917e5d7f247SAmlan Barua 
9186971673cSAmlan Barua       j = low;
9193564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
9206971673cSAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
9216971673cSAmlan Barua         indx2[i] = j;
92226fbe8dcSKarl Rupp         if (k%dim[ndim-1]==0) j+=NM;
9236971673cSAmlan Barua         j++;
9246971673cSAmlan Barua       }
9259566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1));
9269566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2));
9279566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
9289566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
9299566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
9309566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
9319566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
9329566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
9339566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
9349566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
935bd59e6c6SAmlan Barua #endif
9369496c9aeSAmlan Barua       break;
9373c3a9c75SAmlan Barua     }
9380cf2e031SBarry Smith #endif
939e81bb599SAmlan Barua   }
9403564f093SHong Zhang   PetscFunctionReturn(0);
9413c3a9c75SAmlan Barua }
9423c3a9c75SAmlan Barua 
9433564f093SHong Zhang /*@
9443564f093SHong Zhang    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
9453564f093SHong Zhang 
9463564f093SHong Zhang    Collective on Mat
9473564f093SHong Zhang 
9483564f093SHong Zhang     Input Parameters:
9493564f093SHong Zhang +   A - FFTW matrix
9503564f093SHong Zhang -   x - FFTW vector
9513564f093SHong Zhang 
9523564f093SHong Zhang    Output Parameters:
9533564f093SHong Zhang .  y - PETSc vector
9543564f093SHong Zhang 
9553564f093SHong Zhang    Level: intermediate
9563564f093SHong Zhang 
9573564f093SHong Zhang    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
9583564f093SHong Zhang          VecScatterFFTWToPetsc removes those extra zeros.
9593564f093SHong Zhang 
960db781477SPatrick Sanan .seealso: `VecScatterPetscToFFTW()`
9613564f093SHong Zhang @*/
96274a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
9633c3a9c75SAmlan Barua {
9643c3a9c75SAmlan Barua   PetscFunctionBegin;
965cac4c232SBarry Smith   PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));
9663c3a9c75SAmlan Barua   PetscFunctionReturn(0);
9673c3a9c75SAmlan Barua }
96854efbe56SHong Zhang 
96974a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
9705b3e41ffSAmlan Barua {
971ce94432eSBarry Smith   MPI_Comm       comm;
9725b3e41ffSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
9739496c9aeSAmlan Barua   PetscInt       low;
9747a21806cSHong Zhang   PetscMPIInt    rank,size;
9755b3e41ffSAmlan Barua   VecScatter     vecscat;
9760cf2e031SBarry Smith   IS             list1;
9779496c9aeSAmlan Barua 
9783564f093SHong Zhang   PetscFunctionBegin;
9799566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
9809566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
9819566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
9829566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRange(x,&low,NULL));
9835b3e41ffSAmlan Barua 
984e81bb599SAmlan Barua   if (size==1) {
9859566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(comm,fft->N,0,1,&list1));
9869566063dSJacob Faibussowitsch     PetscCall(VecScatterCreate(x,list1,y,list1,&vecscat));
9879566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
9889566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
9899566063dSJacob Faibussowitsch     PetscCall(VecScatterDestroy(&vecscat));
9909566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&list1));
991e81bb599SAmlan Barua 
9920cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
9933564f093SHong Zhang   } else {
9940cf2e031SBarry Smith     Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
9950cf2e031SBarry Smith     PetscInt       ndim  = fft->ndim,*dim = fft->dim;
9960cf2e031SBarry Smith     ptrdiff_t      local_n0,local_0_start;
9970cf2e031SBarry Smith     ptrdiff_t      local_n1,local_1_start;
9980cf2e031SBarry Smith     IS             list2;
9990cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX)
10000cf2e031SBarry Smith     PetscInt       i,j,k,partial_dim;
10010cf2e031SBarry Smith     PetscInt       *indx1, *indx2, tempindx, tempindx1;
10020cf2e031SBarry Smith     PetscInt       NM;
10030cf2e031SBarry Smith     ptrdiff_t      temp;
10040cf2e031SBarry Smith #endif
1005e81bb599SAmlan Barua     switch (ndim) {
1006e81bb599SAmlan Barua     case 1:
100764657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10087a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
100926fbe8dcSKarl Rupp 
10109566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm,local_n1,local_1_start,1,&list1));
10119566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm,local_n1,low,1,&list2));
10129566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
10139566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
10149566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
10159566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10169566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10179566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
101864657d84SAmlan Barua #else
10196a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
102064657d84SAmlan Barua #endif
10215b3e41ffSAmlan Barua       break;
10225b3e41ffSAmlan Barua     case 2:
1023bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10247a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
102526fbe8dcSKarl Rupp 
10269566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1));
10279566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm,local_n0*dim[1],low,1,&list2));
10289566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat));
10299566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
10309566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
10319566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10329566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10339566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1034bd59e6c6SAmlan Barua #else
10357a21806cSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
103626fbe8dcSKarl Rupp 
10379566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1));
10389566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2));
10395b3e41ffSAmlan Barua 
1040db4deed7SKarl Rupp       if (dim[1]%2==0) NM = dim[1]+2;
1041db4deed7SKarl Rupp       else             NM = dim[1]+1;
10425b3e41ffSAmlan Barua 
10435b3e41ffSAmlan Barua       for (i=0; i<local_n0; i++) {
10445b3e41ffSAmlan Barua         for (j=0; j<dim[1]; j++) {
10455b3e41ffSAmlan Barua           tempindx = i*dim[1] + j;
10465b3e41ffSAmlan Barua           tempindx1 = i*NM + j;
104726fbe8dcSKarl Rupp 
10485b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
10495b3e41ffSAmlan Barua           indx2[tempindx]=low+tempindx1;
10505b3e41ffSAmlan Barua         }
10515b3e41ffSAmlan Barua       }
10525b3e41ffSAmlan Barua 
10539566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1));
10549566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2));
10555b3e41ffSAmlan Barua 
10569566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat));
10579566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
10589566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
10599566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10609566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10619566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
10629566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
10639566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1064bd59e6c6SAmlan Barua #endif
10659496c9aeSAmlan Barua       break;
10665b3e41ffSAmlan Barua     case 3:
1067bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10687a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
106926fbe8dcSKarl Rupp 
10709566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1));
10719566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2));
10729566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
10739566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
10749566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
10759566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10769566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10779566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1078bd59e6c6SAmlan Barua #else
10797a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
108026fbe8dcSKarl Rupp 
10819566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1));
10829566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2));
108351d1eed7SAmlan Barua 
1084db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
1085db4deed7SKarl Rupp       else             NM = dim[2]+1;
108651d1eed7SAmlan Barua 
108751d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
108851d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
108951d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
109051d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
109151d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
109226fbe8dcSKarl Rupp 
109351d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
109451d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
109551d1eed7SAmlan Barua           }
109651d1eed7SAmlan Barua         }
109751d1eed7SAmlan Barua       }
109851d1eed7SAmlan Barua 
10999566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1));
11009566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2));
110151d1eed7SAmlan Barua 
11029566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat));
11039566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
11049566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
11059566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
11069566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
11079566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
11089566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
11099566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1110bd59e6c6SAmlan Barua #endif
11119496c9aeSAmlan Barua       break;
11125b3e41ffSAmlan Barua     default:
1113bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
11147a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
111526fbe8dcSKarl Rupp 
11169566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1));
11179566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2));
11189566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
11199566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
11209566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
11219566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
11229566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
11239566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1124bd59e6c6SAmlan Barua #else
1125ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
112626fbe8dcSKarl Rupp 
1127ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
112826fbe8dcSKarl Rupp 
1129c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
113026fbe8dcSKarl Rupp 
1131ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1132ba6e06d0SAmlan Barua 
1133ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1134ba6e06d0SAmlan Barua 
11359566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1));
11369566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2));
1137ba6e06d0SAmlan Barua 
1138db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
1139db4deed7SKarl Rupp       else                  NM = 1;
1140ba6e06d0SAmlan Barua 
1141ba6e06d0SAmlan Barua       j = low;
11423564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1143ba6e06d0SAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
1144ba6e06d0SAmlan Barua         indx2[i] = j;
11453564f093SHong Zhang         if (k%dim[ndim-1]==0) j+=NM;
1146ba6e06d0SAmlan Barua         j++;
1147ba6e06d0SAmlan Barua       }
11489566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1));
11499566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2));
1150ba6e06d0SAmlan Barua 
11519566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat));
11529566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
11539566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
11549566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
11559566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
11569566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
11579566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
11589566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1159bd59e6c6SAmlan Barua #endif
11609496c9aeSAmlan Barua       break;
11615b3e41ffSAmlan Barua     }
11620cf2e031SBarry Smith #endif
1163e81bb599SAmlan Barua   }
11643564f093SHong Zhang   PetscFunctionReturn(0);
11655b3e41ffSAmlan Barua }
11665b3e41ffSAmlan Barua 
11673c3a9c75SAmlan Barua /*
11683564f093SHong Zhang     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
11693564f093SHong Zhang 
11703c3a9c75SAmlan Barua   Options Database Keys:
11713c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
11723c3a9c75SAmlan Barua 
11733c3a9c75SAmlan Barua    Level: intermediate
11743c3a9c75SAmlan Barua 
11753c3a9c75SAmlan Barua */
11768cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1177b2b77a04SHong Zhang {
1178ce94432eSBarry Smith   MPI_Comm       comm;
1179b2b77a04SHong Zhang   Mat_FFT        *fft = (Mat_FFT*)A->data;
1180b2b77a04SHong Zhang   Mat_FFTW       *fftw;
11810cf2e031SBarry Smith   PetscInt       ndim = fft->ndim,*dim = fft->dim;
11825274ed1bSHong Zhang   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
11835274ed1bSHong Zhang   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1184b2b77a04SHong Zhang   PetscBool      flg;
11854f48bc67SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr;
118611902ff2SHong Zhang   PetscMPIInt    size,rank;
11879496c9aeSAmlan Barua   ptrdiff_t      *pdim;
11889496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11890cf2e031SBarry Smith   PetscInt       tot_dim;
11909496c9aeSAmlan Barua #endif
11919496c9aeSAmlan Barua 
1192b2b77a04SHong Zhang   PetscFunctionBegin;
11939566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
11949566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
11959566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1196c92418ccSAmlan Barua 
11970cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
11981878d478SAmlan Barua   fftw_mpi_init();
11990cf2e031SBarry Smith #endif
120011902ff2SHong Zhang   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
120111902ff2SHong Zhang   pdim[0] = dim[0];
12028ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12038ca4c034SAmlan Barua   tot_dim = 2*dim[0];
12048ca4c034SAmlan Barua #endif
12053564f093SHong Zhang   for (ctr=1; ctr<ndim; ctr++) {
12066882372aSHong Zhang     partial_dim *= dim[ctr];
120711902ff2SHong Zhang     pdim[ctr]    = dim[ctr];
12088ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1209db4deed7SKarl Rupp     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1210db4deed7SKarl Rupp     else             tot_dim *= dim[ctr];
12118ca4c034SAmlan Barua #endif
12126882372aSHong Zhang   }
12133c3a9c75SAmlan Barua 
1214b2b77a04SHong Zhang   if (size == 1) {
1215e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12169566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(A,fft->N,fft->N,fft->N,fft->N));
12170cf2e031SBarry Smith     fft->n = fft->N;
1218e81bb599SAmlan Barua #else
12199566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim));
12200cf2e031SBarry Smith     fft->n = tot_dim;
12210cf2e031SBarry Smith #endif
12220cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
12230cf2e031SBarry Smith   } else {
12240cf2e031SBarry Smith     ptrdiff_t local_n0,local_0_start,local_n1,local_1_start;
12250cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX)
12260cf2e031SBarry Smith     ptrdiff_t temp;
12270cf2e031SBarry Smith     PetscInt  N1;
1228e81bb599SAmlan Barua #endif
1229e81bb599SAmlan Barua 
1230b2b77a04SHong Zhang     switch (ndim) {
1231b2b77a04SHong Zhang     case 1:
12323c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12333c3a9c75SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1234e5d7f247SAmlan Barua #else
12357a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
12360cf2e031SBarry Smith       fft->n = (PetscInt)local_n0;
12379566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A,local_n1,fft->n,fft->N,fft->N));
1238e5d7f247SAmlan Barua #endif
1239b2b77a04SHong Zhang       break;
1240b2b77a04SHong Zhang     case 2:
12415b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
12427a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
12430cf2e031SBarry Smith       fft->n    = (PetscInt)local_n0*dim[1];
12449566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A,fft->n,fft->n,fft->N,fft->N));
12455b3e41ffSAmlan Barua #else
1246c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
124726fbe8dcSKarl Rupp 
12480cf2e031SBarry Smith       fft->n = 2*(PetscInt)local_n0*(dim[1]/2+1);
12499566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A,fft->n,fft->n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)));
12505b3e41ffSAmlan Barua #endif
1251b2b77a04SHong Zhang       break;
1252b2b77a04SHong Zhang     case 3:
125351d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12547a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
125526fbe8dcSKarl Rupp 
12560cf2e031SBarry Smith       fft->n = (PetscInt)local_n0*dim[1]*dim[2];
12579566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A,fft->n,fft->n,fft->N,fft->N));
125851d1eed7SAmlan Barua #else
1259c3eba89aSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
126026fbe8dcSKarl Rupp 
12610cf2e031SBarry Smith       fft->n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
12629566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A,fft->n,fft->n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)));
126351d1eed7SAmlan Barua #endif
1264b2b77a04SHong Zhang       break;
1265b2b77a04SHong Zhang     default:
1266b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12677a21806cSHong Zhang       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
126826fbe8dcSKarl Rupp 
12690cf2e031SBarry Smith       fft->n = (PetscInt)local_n0*partial_dim;
12709566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A,fft->n,fft->n,fft->N,fft->N));
1271b3a17365SAmlan Barua #else
1272b3a17365SAmlan Barua       temp = pdim[ndim-1];
127326fbe8dcSKarl Rupp 
1274b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
127526fbe8dcSKarl Rupp 
1276c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
127726fbe8dcSKarl Rupp 
12780cf2e031SBarry Smith       fft->n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
12790cf2e031SBarry Smith       N1 = 2*fft->N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
128026fbe8dcSKarl Rupp 
1281b3a17365SAmlan Barua       pdim[ndim-1] = temp;
128226fbe8dcSKarl Rupp 
12839566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A,fft->n,fft->n,N1,N1));
1284b3a17365SAmlan Barua #endif
1285b2b77a04SHong Zhang       break;
1286b2b77a04SHong Zhang     }
12870cf2e031SBarry Smith #endif
1288b2b77a04SHong Zhang   }
12892277172eSMark Adams   free(pdim);
12909566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATFFTW));
12919566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(A,&fftw));
1292b2b77a04SHong Zhang   fft->data = (void*)fftw;
1293b2b77a04SHong Zhang 
12940c74a584SJed Brown   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1295e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
129626fbe8dcSKarl Rupp 
12979566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw)));
12988c1d5ab3SHong Zhang   if (size == 1) {
1299a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
1300a1b6d50cSHong Zhang     fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1301a1b6d50cSHong Zhang #else
13028c1d5ab3SHong Zhang     fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1303a1b6d50cSHong Zhang #endif
13048c1d5ab3SHong Zhang   }
130526fbe8dcSKarl Rupp 
1306b1301b2eSHong Zhang   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1307c92418ccSAmlan Barua 
1308f4259b30SLisandro Dalcin   fftw->p_forward  = NULL;
1309f4259b30SLisandro Dalcin   fftw->p_backward = NULL;
1310b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1311b2b77a04SHong Zhang 
1312b2b77a04SHong Zhang   if (size == 1) {
1313b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1314b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
13150cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
1316b2b77a04SHong Zhang   } else {
1317b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1318b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
13190cf2e031SBarry Smith #endif
1320b2b77a04SHong Zhang   }
1321b2b77a04SHong Zhang   fft->matdestroy = MatDestroy_FFTW;
1322b2b77a04SHong Zhang   A->assembled    = PETSC_TRUE;
13234a52bad8SHong Zhang   A->preallocated = PETSC_TRUE;
132426fbe8dcSKarl Rupp 
13259566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW));
13269566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW));
13279566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW));
1328b2b77a04SHong Zhang 
1329b2b77a04SHong Zhang   /* get runtime options */
1330d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");
13319566063dSJacob Faibussowitsch   PetscCall(PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg));
13325f80ce2aSJacob Faibussowitsch   if (flg) fftw->p_flag = iplans[p_flag];
1333d0609cedSBarry Smith   PetscOptionsEnd();
1334b2b77a04SHong Zhang   PetscFunctionReturn(0);
1335b2b77a04SHong Zhang }
1336