xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 4dfa11a44d5adf2389f1d3acbc8f3c1116dc6c3a)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4c4762a1bSJed Brown     Testing examples can be found in ~src/mat/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
90cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
10c6db04a5SJed Brown #include <fftw3-mpi.h>
110cf2e031SBarry Smith #else
120cf2e031SBarry Smith #include <fftw3.h>
130cf2e031SBarry Smith #endif
14b2b77a04SHong Zhang EXTERN_C_END
15b2b77a04SHong Zhang 
16b2b77a04SHong Zhang typedef struct {
17b9ae062cSHong Zhang   ptrdiff_t ndim_fftw, *dim_fftw;
18a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
19a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
20a1b6d50cSHong Zhang #else
218c1d5ab3SHong Zhang   fftw_iodim *iodims;
22a1b6d50cSHong Zhang #endif
23e5d7f247SAmlan Barua   PetscInt     partial_dim;
24b2b77a04SHong Zhang   fftw_plan    p_forward, p_backward;
25b2b77a04SHong Zhang   unsigned     p_flag;                                      /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
26b2b77a04SHong Zhang   PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays becaue fftw plan should be
27b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
28b2b77a04SHong Zhang } Mat_FFTW;
29b2b77a04SHong Zhang 
30b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec);
31b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec);
320cf2e031SBarry Smith extern PetscErrorCode MatDestroy_FFTW(Mat);
330cf2e031SBarry Smith extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
340cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
35b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec);
36b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec);
37b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
380cf2e031SBarry Smith #endif
39b2b77a04SHong Zhang 
400cf2e031SBarry Smith /*
410cf2e031SBarry Smith    MatMult_SeqFFTW performs forward DFT
4297504071SAmlan Barua    Input parameter:
433564f093SHong Zhang      A - the matrix
4497504071SAmlan Barua      x - the vector on which FDFT will be performed
4597504071SAmlan Barua 
4697504071SAmlan Barua    Output parameter:
4797504071SAmlan Barua      y - vector that stores result of FDFT
4897504071SAmlan Barua */
499371c9d4SSatish Balay PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y) {
50b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
51b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
52f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
53f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
541acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
55a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
56a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
57a1b6d50cSHong Zhang #else
588c1d5ab3SHong Zhang   fftw_iodim *iodims;
59a1b6d50cSHong Zhang #endif
601acd80e4SHong Zhang   PetscInt i;
611acd80e4SHong Zhang #endif
621acd80e4SHong Zhang   PetscInt ndim = fft->ndim, *dim = fft->dim;
63b2b77a04SHong Zhang 
64b2b77a04SHong Zhang   PetscFunctionBegin;
659566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &x_array));
669566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
676aad120cSJose E. Roman   if (!fftw->p_forward) { /* create a plan, then execute it */
68b2b77a04SHong Zhang     switch (ndim) {
69b2b77a04SHong Zhang     case 1:
7058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
71b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
7258a912c5SAmlan Barua #else
7358a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
7458a912c5SAmlan Barua #endif
75b2b77a04SHong Zhang       break;
76b2b77a04SHong Zhang     case 2:
7758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
78b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
7958a912c5SAmlan Barua #else
8058a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
8158a912c5SAmlan Barua #endif
82b2b77a04SHong Zhang       break;
83b2b77a04SHong Zhang     case 3:
8458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
85b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
8658a912c5SAmlan Barua #else
8758a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
8858a912c5SAmlan Barua #endif
89b2b77a04SHong Zhang       break;
90b2b77a04SHong Zhang     default:
9158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
92a1b6d50cSHong Zhang       iodims = fftw->iodims;
93a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
948c1d5ab3SHong Zhang       if (ndim) {
95a1b6d50cSHong Zhang         iodims[ndim - 1].n  = (ptrdiff_t)dim[ndim - 1];
968c1d5ab3SHong Zhang         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
978c1d5ab3SHong Zhang         for (i = ndim - 2; i >= 0; --i) {
98a1b6d50cSHong Zhang           iodims[i].n  = (ptrdiff_t)dim[i];
998c1d5ab3SHong Zhang           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
1008c1d5ab3SHong Zhang         }
1018c1d5ab3SHong Zhang       }
102a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
103a1b6d50cSHong Zhang #else
104a1b6d50cSHong Zhang       if (ndim) {
105a1b6d50cSHong Zhang         iodims[ndim - 1].n  = (int)dim[ndim - 1];
106a1b6d50cSHong Zhang         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
107a1b6d50cSHong Zhang         for (i = ndim - 2; i >= 0; --i) {
108a1b6d50cSHong Zhang           iodims[i].n  = (int)dim[i];
109a1b6d50cSHong Zhang           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
110a1b6d50cSHong Zhang         }
111a1b6d50cSHong Zhang       }
112a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
113a1b6d50cSHong Zhang #endif
114a1b6d50cSHong Zhang 
11558a912c5SAmlan Barua #else
116a31b9140SHong Zhang       fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
11758a912c5SAmlan Barua #endif
118b2b77a04SHong Zhang       break;
119b2b77a04SHong Zhang     }
120f4fca6d4SMatthew G. Knepley     fftw->finarray  = (PetscScalar *)x_array;
121b2b77a04SHong Zhang     fftw->foutarray = y_array;
122b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
123b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
124b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
125b2b77a04SHong Zhang   } else {                                                         /* use existing plan */
126b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
1277e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
128b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
1297e4bc134SDominic Meiser #else
1307e4bc134SDominic Meiser       fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
1317e4bc134SDominic Meiser #endif
132b2b77a04SHong Zhang     } else {
133b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
134b2b77a04SHong Zhang     }
135b2b77a04SHong Zhang   }
1369566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
1379566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
138b2b77a04SHong Zhang   PetscFunctionReturn(0);
139b2b77a04SHong Zhang }
140b2b77a04SHong Zhang 
14197504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT
14297504071SAmlan Barua    Input parameter:
1433564f093SHong Zhang      A - the matrix
14497504071SAmlan Barua      x - the vector on which BDFT will be performed
14597504071SAmlan Barua 
14697504071SAmlan Barua    Output parameter:
14797504071SAmlan Barua      y - vector that stores result of BDFT
14897504071SAmlan Barua */
14997504071SAmlan Barua 
1509371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y) {
151b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
152b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
153f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
154f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
155b2b77a04SHong Zhang   PetscInt           ndim = fft->ndim, *dim = fft->dim;
1561acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
157a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
158a1b6d50cSHong Zhang   fftw_iodim64 *iodims = fftw->iodims;
159a1b6d50cSHong Zhang #else
160a1b6d50cSHong Zhang   fftw_iodim *iodims = fftw->iodims;
161a1b6d50cSHong Zhang #endif
1621acd80e4SHong Zhang #endif
163b2b77a04SHong Zhang 
164b2b77a04SHong Zhang   PetscFunctionBegin;
1659566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &x_array));
1669566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
1676aad120cSJose E. Roman   if (!fftw->p_backward) { /* create a plan, then execute it */
168b2b77a04SHong Zhang     switch (ndim) {
169b2b77a04SHong Zhang     case 1:
17058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
171b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
17258a912c5SAmlan Barua #else
173e81bb599SAmlan Barua       fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
17458a912c5SAmlan Barua #endif
175b2b77a04SHong Zhang       break;
176b2b77a04SHong Zhang     case 2:
17758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
178b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
17958a912c5SAmlan Barua #else
180e81bb599SAmlan Barua       fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
18158a912c5SAmlan Barua #endif
182b2b77a04SHong Zhang       break;
183b2b77a04SHong Zhang     case 3:
18458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
185b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
18658a912c5SAmlan Barua #else
187e81bb599SAmlan Barua       fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
18858a912c5SAmlan Barua #endif
189b2b77a04SHong Zhang       break;
190b2b77a04SHong Zhang     default:
19158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
192a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
193a1b6d50cSHong Zhang       fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
194a1b6d50cSHong Zhang #else
1958c1d5ab3SHong Zhang       fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
196a1b6d50cSHong Zhang #endif
19758a912c5SAmlan Barua #else
198a31b9140SHong Zhang       fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
19958a912c5SAmlan Barua #endif
200b2b77a04SHong Zhang       break;
201b2b77a04SHong Zhang     }
202f4fca6d4SMatthew G. Knepley     fftw->binarray  = (PetscScalar *)x_array;
203b2b77a04SHong Zhang     fftw->boutarray = y_array;
2042f613bf5SBarry Smith     fftw_execute(fftw->p_backward);
205b2b77a04SHong Zhang   } else {                                                         /* use existing plan */
206b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
2077e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
208b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
2097e4bc134SDominic Meiser #else
2107e4bc134SDominic Meiser       fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
2117e4bc134SDominic Meiser #endif
212b2b77a04SHong Zhang     } else {
2132f613bf5SBarry Smith       fftw_execute(fftw->p_backward);
214b2b77a04SHong Zhang     }
215b2b77a04SHong Zhang   }
2169566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
2179566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
218b2b77a04SHong Zhang   PetscFunctionReturn(0);
219b2b77a04SHong Zhang }
220b2b77a04SHong Zhang 
2210cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
22297504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel
22397504071SAmlan Barua    Input parameter:
2243564f093SHong Zhang      A - the matrix
22597504071SAmlan Barua      x - the vector on which FDFT will be performed
22697504071SAmlan Barua 
22797504071SAmlan Barua    Output parameter:
22897504071SAmlan Barua    y   - vector that stores result of FDFT
22997504071SAmlan Barua */
2309371c9d4SSatish Balay PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y) {
231b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
232b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
233f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
234f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
235c92418ccSAmlan Barua   PetscInt           ndim = fft->ndim, *dim = fft->dim;
236ce94432eSBarry Smith   MPI_Comm           comm;
237b2b77a04SHong Zhang 
238b2b77a04SHong Zhang   PetscFunctionBegin;
2399566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2409566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &x_array));
2419566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
2426aad120cSJose E. Roman   if (!fftw->p_forward) { /* create a plan, then execute it */
243b2b77a04SHong Zhang     switch (ndim) {
244b2b77a04SHong Zhang     case 1:
2453c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
246b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
247ae0a50aaSHong Zhang #else
2484f8276c3SHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
2493c3a9c75SAmlan Barua #endif
250b2b77a04SHong Zhang       break;
251b2b77a04SHong Zhang     case 2:
25297504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
253b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2543c3a9c75SAmlan Barua #else
2553c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2563c3a9c75SAmlan Barua #endif
257b2b77a04SHong Zhang       break;
258b2b77a04SHong Zhang     case 3:
2593c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
260b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2613c3a9c75SAmlan Barua #else
26251d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2633c3a9c75SAmlan Barua #endif
264b2b77a04SHong Zhang       break;
265b2b77a04SHong Zhang     default:
2663c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
267c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2683c3a9c75SAmlan Barua #else
2693c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2703c3a9c75SAmlan Barua #endif
271b2b77a04SHong Zhang       break;
272b2b77a04SHong Zhang     }
273f4fca6d4SMatthew G. Knepley     fftw->finarray  = (PetscScalar *)x_array;
274b2b77a04SHong Zhang     fftw->foutarray = y_array;
275b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
276b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
277b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
278b2b77a04SHong Zhang   } else {                                                         /* use existing plan */
279b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
280b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
281b2b77a04SHong Zhang     } else {
282b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
283b2b77a04SHong Zhang     }
284b2b77a04SHong Zhang   }
2859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
2869566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
287b2b77a04SHong Zhang   PetscFunctionReturn(0);
288b2b77a04SHong Zhang }
289b2b77a04SHong Zhang 
2900cf2e031SBarry Smith /*
2910cf2e031SBarry Smith    MatMultTranspose_MPIFFTW performs parallel backward DFT
29297504071SAmlan Barua    Input parameter:
2933564f093SHong Zhang      A - the matrix
29497504071SAmlan Barua      x - the vector on which BDFT will be performed
29597504071SAmlan Barua 
29697504071SAmlan Barua    Output parameter:
29797504071SAmlan Barua      y - vector that stores result of BDFT
29897504071SAmlan Barua */
2999371c9d4SSatish Balay PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y) {
300b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
301b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
302f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
303f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
304c92418ccSAmlan Barua   PetscInt           ndim = fft->ndim, *dim = fft->dim;
305ce94432eSBarry Smith   MPI_Comm           comm;
306b2b77a04SHong Zhang 
307b2b77a04SHong Zhang   PetscFunctionBegin;
3089566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
3099566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &x_array));
3109566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
3116aad120cSJose E. Roman   if (!fftw->p_backward) { /* create a plan, then execute it */
312b2b77a04SHong Zhang     switch (ndim) {
313b2b77a04SHong Zhang     case 1:
3143c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
315b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
316ae0a50aaSHong Zhang #else
3174f8276c3SHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
3183c3a9c75SAmlan Barua #endif
319b2b77a04SHong Zhang       break;
320b2b77a04SHong Zhang     case 2:
32197504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
322b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
3233c3a9c75SAmlan Barua #else
3243c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3253c3a9c75SAmlan Barua #endif
326b2b77a04SHong Zhang       break;
327b2b77a04SHong Zhang     case 3:
3283c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
329b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
3303c3a9c75SAmlan Barua #else
3313c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3323c3a9c75SAmlan Barua #endif
333b2b77a04SHong Zhang       break;
334b2b77a04SHong Zhang     default:
3353c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
336c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
3373c3a9c75SAmlan Barua #else
3383c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3393c3a9c75SAmlan Barua #endif
340b2b77a04SHong Zhang       break;
341b2b77a04SHong Zhang     }
342f4fca6d4SMatthew G. Knepley     fftw->binarray  = (PetscScalar *)x_array;
343b2b77a04SHong Zhang     fftw->boutarray = y_array;
3442f613bf5SBarry Smith     fftw_execute(fftw->p_backward);
345b2b77a04SHong Zhang   } else {                                                         /* use existing plan */
346b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
347b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
348b2b77a04SHong Zhang     } else {
3492f613bf5SBarry Smith       fftw_execute(fftw->p_backward);
350b2b77a04SHong Zhang     }
351b2b77a04SHong Zhang   }
3529566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
3539566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
354b2b77a04SHong Zhang   PetscFunctionReturn(0);
355b2b77a04SHong Zhang }
3560cf2e031SBarry Smith #endif
357b2b77a04SHong Zhang 
3589371c9d4SSatish Balay PetscErrorCode MatDestroy_FFTW(Mat A) {
359b2b77a04SHong Zhang   Mat_FFT  *fft  = (Mat_FFT *)A->data;
360b2b77a04SHong Zhang   Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
361b2b77a04SHong Zhang 
362b2b77a04SHong Zhang   PetscFunctionBegin;
363b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
364b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
365ad540459SPierre Jolivet   if (fftw->iodims) free(fftw->iodims);
3669566063dSJacob Faibussowitsch   PetscCall(PetscFree(fftw->dim_fftw));
3679566063dSJacob Faibussowitsch   PetscCall(PetscFree(fft->data));
3682e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
3692e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
3702e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
3710cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
3726ccf0b3eSHong Zhang   fftw_mpi_cleanup();
3730cf2e031SBarry Smith #endif
374b2b77a04SHong Zhang   PetscFunctionReturn(0);
375b2b77a04SHong Zhang }
376b2b77a04SHong Zhang 
3770cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
378c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I  "petscvec.h"   I*/
3799371c9d4SSatish Balay PetscErrorCode VecDestroy_MPIFFTW(Vec v) {
380b2b77a04SHong Zhang   PetscScalar *array;
381b2b77a04SHong Zhang 
382b2b77a04SHong Zhang   PetscFunctionBegin;
3839566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &array));
3842f613bf5SBarry Smith   fftw_free((fftw_complex *)array);
3859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &array));
3869566063dSJacob Faibussowitsch   PetscCall(VecDestroy_MPI(v));
387b2b77a04SHong Zhang   PetscFunctionReturn(0);
388b2b77a04SHong Zhang }
3890cf2e031SBarry Smith #endif
390b2b77a04SHong Zhang 
3910cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
3929371c9d4SSatish Balay static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new) {
3935b113e21Ss-sajid-ali   Mat A;
3945b113e21Ss-sajid-ali 
3955b113e21Ss-sajid-ali   PetscFunctionBegin;
3969566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
3979566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
3985b113e21Ss-sajid-ali   PetscFunctionReturn(0);
3995b113e21Ss-sajid-ali }
4005b113e21Ss-sajid-ali 
4019371c9d4SSatish Balay static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new) {
4025b113e21Ss-sajid-ali   Mat A;
4035b113e21Ss-sajid-ali 
4045b113e21Ss-sajid-ali   PetscFunctionBegin;
4059566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
4069566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
4075b113e21Ss-sajid-ali   PetscFunctionReturn(0);
4085b113e21Ss-sajid-ali }
4095b113e21Ss-sajid-ali 
4109371c9d4SSatish Balay static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) {
4115b113e21Ss-sajid-ali   Mat A;
4125b113e21Ss-sajid-ali 
4135b113e21Ss-sajid-ali   PetscFunctionBegin;
4149566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
4159566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
4165b113e21Ss-sajid-ali   PetscFunctionReturn(0);
4175b113e21Ss-sajid-ali }
4180cf2e031SBarry Smith #endif
4195b113e21Ss-sajid-ali 
4204be45526SHong Zhang /*@
4212a7a6963SBarry Smith    MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
42211a5261eSBarry Smith      parallel layout determined by `MATFFTW`
4234f7415efSAmlan Barua 
4244f7415efSAmlan Barua    Collective on Mat
4254f7415efSAmlan Barua 
4264f7415efSAmlan Barua    Input Parameter:
4273564f093SHong Zhang .   A - the matrix
4284f7415efSAmlan Barua 
429d8d19677SJose E. Roman    Output Parameters:
430cc55f3b1SHong Zhang +   x - (optional) input vector of forward FFTW
4316b867d5aSJose E. Roman .   y - (optional) output vector of forward FFTW
432cc55f3b1SHong Zhang -   z - (optional) output vector of backward FFTW
4334f7415efSAmlan Barua 
4344f7415efSAmlan Barua   Level: advanced
4353564f093SHong Zhang 
43611a5261eSBarry Smith   Notes:
43711a5261eSBarry Smith   The parallel layout of output of forward FFTW is always same as the input
43897504071SAmlan Barua   of the backward FFTW. But parallel layout of the input vector of forward
43997504071SAmlan Barua   FFTW might not be same as the output of backward FFTW.
44011a5261eSBarry Smith 
44111a5261eSBarry Smith   We need to provide enough space while doing parallel real transform.
44297504071SAmlan Barua   We need to pad extra zeros at the end of last dimension. For this reason the one needs to
44397504071SAmlan Barua   invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
444e0ec536eSAmlan Barua   last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
445e0ec536eSAmlan Barua   depends on if the last dimension is even or odd. If the last dimension is even need to pad two
446e0ec536eSAmlan Barua   zeros if it is odd only one zero is needed.
44711a5261eSBarry Smith 
448e0ec536eSAmlan Barua   Lastly one needs some scratch space at the end of data set in each process. alloc_local
449e0ec536eSAmlan Barua   figures out how much space is needed, i.e. it figures out the data+scratch space for
450e0ec536eSAmlan Barua   each processor and returns that.
4514f7415efSAmlan Barua 
45211a5261eSBarry Smith   Developer Note:
45311a5261eSBarry Smith   How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?
45411a5261eSBarry Smith 
45511a5261eSBarry Smith .seealso: `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
4564be45526SHong Zhang @*/
4579371c9d4SSatish Balay PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z) {
4584be45526SHong Zhang   PetscFunctionBegin;
459cac4c232SBarry Smith   PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
4604be45526SHong Zhang   PetscFunctionReturn(0);
4614be45526SHong Zhang }
4624be45526SHong Zhang 
4639371c9d4SSatish Balay PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout) {
4644f7415efSAmlan Barua   PetscMPIInt size, rank;
465ce94432eSBarry Smith   MPI_Comm    comm;
4664f7415efSAmlan Barua   Mat_FFT    *fft = (Mat_FFT *)A->data;
4674f7415efSAmlan Barua 
4684f7415efSAmlan Barua   PetscFunctionBegin;
4694f7415efSAmlan Barua   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4704f7415efSAmlan Barua   PetscValidType(A, 1);
4719566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
4724f7415efSAmlan Barua 
4739566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
4749566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4754f7415efSAmlan Barua   if (size == 1) { /* sequential case */
4764f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4779566063dSJacob Faibussowitsch     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
4789566063dSJacob Faibussowitsch     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
4799566063dSJacob Faibussowitsch     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
4804f7415efSAmlan Barua #else
4819566063dSJacob Faibussowitsch     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
4829566063dSJacob Faibussowitsch     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
4839566063dSJacob Faibussowitsch     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
4844f7415efSAmlan Barua #endif
4850cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
48697504071SAmlan Barua   } else { /* parallel cases */
4870cf2e031SBarry Smith     Mat_FFTW     *fftw = (Mat_FFTW *)fft->data;
4880cf2e031SBarry Smith     PetscInt      ndim = fft->ndim, *dim = fft->dim;
4894f7415efSAmlan Barua     ptrdiff_t     alloc_local, local_n0, local_0_start;
4909496c9aeSAmlan Barua     ptrdiff_t     local_n1;
4919496c9aeSAmlan Barua     fftw_complex *data_fout;
4929496c9aeSAmlan Barua     ptrdiff_t     local_1_start;
4939496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4949496c9aeSAmlan Barua     fftw_complex *data_fin, *data_bout;
4959496c9aeSAmlan Barua #else
4964f7415efSAmlan Barua     double   *data_finr, *data_boutr;
4976ccf0b3eSHong Zhang     PetscInt  n1, N1;
4989496c9aeSAmlan Barua     ptrdiff_t temp;
4999496c9aeSAmlan Barua #endif
5009496c9aeSAmlan Barua 
5014f7415efSAmlan Barua     switch (ndim) {
5024f7415efSAmlan Barua     case 1:
50357625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5044f8276c3SHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
50557625b84SAmlan Barua #else
50657625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
50757625b84SAmlan Barua       if (fin) {
50857625b84SAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5099566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin));
5109566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5115b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
51257625b84SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
51357625b84SAmlan Barua       }
51457625b84SAmlan Barua       if (fout) {
51557625b84SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5169566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout));
5179566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5185b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
51957625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
52057625b84SAmlan Barua       }
52157625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
52257625b84SAmlan Barua       if (bout) {
52357625b84SAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5249566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout));
5259566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5265b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
52757625b84SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
52857625b84SAmlan Barua       }
52957625b84SAmlan Barua       break;
53057625b84SAmlan Barua #endif
5314f7415efSAmlan Barua     case 2:
53297504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
5334f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
5349371c9d4SSatish Balay       N1          = 2 * dim[0] * (dim[1] / 2 + 1);
5359371c9d4SSatish Balay       n1          = 2 * local_n0 * (dim[1] / 2 + 1);
5364f7415efSAmlan Barua       if (fin) {
5374f7415efSAmlan Barua         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
5389566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
5399566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5405b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5414f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5424f7415efSAmlan Barua       }
54357625b84SAmlan Barua       if (fout) {
54457625b84SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5459566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
5469566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5475b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
54857625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
54957625b84SAmlan Barua       }
5505b113e21Ss-sajid-ali       if (bout) {
5515b113e21Ss-sajid-ali         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
5529566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
5539566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5545b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5555b113e21Ss-sajid-ali         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5565b113e21Ss-sajid-ali       }
5574f7415efSAmlan Barua #else
5584f7415efSAmlan Barua       /* Get local size */
5594f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
5604f7415efSAmlan Barua       if (fin) {
5614f7415efSAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5629566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
5639566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5645b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5654f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5664f7415efSAmlan Barua       }
5674f7415efSAmlan Barua       if (fout) {
5684f7415efSAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5699566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
5709566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5715b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
5724f7415efSAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5734f7415efSAmlan Barua       }
5744f7415efSAmlan Barua       if (bout) {
5754f7415efSAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5769566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
5779566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5785b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5794f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5804f7415efSAmlan Barua       }
5814f7415efSAmlan Barua #endif
5824f7415efSAmlan Barua       break;
5834f7415efSAmlan Barua     case 3:
5844f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5854f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
5869371c9d4SSatish Balay       N1          = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1);
5879371c9d4SSatish Balay       n1          = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1);
5884f7415efSAmlan Barua       if (fin) {
5894f7415efSAmlan Barua         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
5909566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
5919566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5925b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5934f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5944f7415efSAmlan Barua       }
5955b113e21Ss-sajid-ali       if (fout) {
5965b113e21Ss-sajid-ali         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5979566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
5989566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5995b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6005b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6015b113e21Ss-sajid-ali       }
6024f7415efSAmlan Barua       if (bout) {
6034f7415efSAmlan Barua         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6049566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
6059566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6065b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6074f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6084f7415efSAmlan Barua       }
6094f7415efSAmlan Barua #else
6100c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
6110c9b18e4SAmlan Barua       if (fin) {
6120c9b18e4SAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6139566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
6149566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6155b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6160c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6170c9b18e4SAmlan Barua       }
6180c9b18e4SAmlan Barua       if (fout) {
6190c9b18e4SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6209566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
6219566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6225b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6230c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6240c9b18e4SAmlan Barua       }
6250c9b18e4SAmlan Barua       if (bout) {
6260c9b18e4SAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6279566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
6289566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6295b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6300c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6310c9b18e4SAmlan Barua       }
6324f7415efSAmlan Barua #endif
6334f7415efSAmlan Barua       break;
6344f7415efSAmlan Barua     default:
6354f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6364f7415efSAmlan Barua       temp                                  = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
6374f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
6384f7415efSAmlan Barua       alloc_local                           = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
6390cf2e031SBarry Smith       N1                                    = 2 * fft->N * (PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
6404f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
6414f7415efSAmlan Barua 
6424f7415efSAmlan Barua       if (fin) {
6434f7415efSAmlan Barua         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6449566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
6459566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6465b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6474f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6484f7415efSAmlan Barua       }
6495b113e21Ss-sajid-ali       if (fout) {
6505b113e21Ss-sajid-ali         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6519566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
6529566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6535b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6545b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6555b113e21Ss-sajid-ali       }
6564f7415efSAmlan Barua       if (bout) {
6574f7415efSAmlan Barua         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6589566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
6599566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6605b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6619496c9aeSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6624f7415efSAmlan Barua       }
6634f7415efSAmlan Barua #else
6640c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
6650c9b18e4SAmlan Barua       if (fin) {
6660c9b18e4SAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6679566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
6689566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6695b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6700c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6710c9b18e4SAmlan Barua       }
6720c9b18e4SAmlan Barua       if (fout) {
6730c9b18e4SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6749566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
6759566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6765b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6770c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6780c9b18e4SAmlan Barua       }
6790c9b18e4SAmlan Barua       if (bout) {
6800c9b18e4SAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6819566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
6829566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6835b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6840c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6850c9b18e4SAmlan Barua       }
6864f7415efSAmlan Barua #endif
6874f7415efSAmlan Barua       break;
6884f7415efSAmlan Barua     }
689f9d91177SJunchao Zhang     /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
690f9d91177SJunchao Zhang        v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
691f9d91177SJunchao Zhang        memory leaks. We void these pointers here to avoid acident uses.
692f9d91177SJunchao Zhang     */
693f9d91177SJunchao Zhang     if (fin) (*fin)->ops->replacearray = NULL;
694f9d91177SJunchao Zhang     if (fout) (*fout)->ops->replacearray = NULL;
695f9d91177SJunchao Zhang     if (bout) (*bout)->ops->replacearray = NULL;
6960cf2e031SBarry Smith #endif
6974f7415efSAmlan Barua   }
6984f7415efSAmlan Barua   PetscFunctionReturn(0);
6994f7415efSAmlan Barua }
7004f7415efSAmlan Barua 
7013564f093SHong Zhang /*@
70211a5261eSBarry Smith    VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.
70354efbe56SHong Zhang 
7043564f093SHong Zhang    Collective on Mat
7053564f093SHong Zhang 
7063564f093SHong Zhang    Input Parameters:
7073564f093SHong Zhang +  A - FFTW matrix
7083564f093SHong Zhang -  x - the PETSc vector
7093564f093SHong Zhang 
7103564f093SHong Zhang    Output Parameters:
7113564f093SHong Zhang .  y - the FFTW vector
7123564f093SHong Zhang 
713b2b77a04SHong Zhang    Level: intermediate
7143564f093SHong Zhang 
71511a5261eSBarry Smith    Note:
71611a5261eSBarry Smith    For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
71797504071SAmlan Barua    one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
71897504071SAmlan Barua    zeros. This routine does that job by scattering operation.
71997504071SAmlan Barua 
72011a5261eSBarry Smith .seealso: `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
7213564f093SHong Zhang @*/
7229371c9d4SSatish Balay PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y) {
7233564f093SHong Zhang   PetscFunctionBegin;
724cac4c232SBarry Smith   PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
7253564f093SHong Zhang   PetscFunctionReturn(0);
7263564f093SHong Zhang }
7273c3a9c75SAmlan Barua 
7289371c9d4SSatish Balay PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y) {
729ce94432eSBarry Smith   MPI_Comm    comm;
7303c3a9c75SAmlan Barua   Mat_FFT    *fft = (Mat_FFT *)A->data;
7319496c9aeSAmlan Barua   PetscInt    low;
7327a21806cSHong Zhang   PetscMPIInt rank, size;
7337a21806cSHong Zhang   PetscInt    vsize, vsize1;
7343c3a9c75SAmlan Barua   VecScatter  vecscat;
7350cf2e031SBarry Smith   IS          list1;
7363c3a9c75SAmlan Barua 
7373564f093SHong Zhang   PetscFunctionBegin;
7389566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
7399566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
7409566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
7419566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRange(y, &low, NULL));
7423c3a9c75SAmlan Barua 
7433564f093SHong Zhang   if (size == 1) {
7449566063dSJacob Faibussowitsch     PetscCall(VecGetSize(x, &vsize));
7459566063dSJacob Faibussowitsch     PetscCall(VecGetSize(y, &vsize1));
7469566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
7479566063dSJacob Faibussowitsch     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
7489566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7499566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7509566063dSJacob Faibussowitsch     PetscCall(VecScatterDestroy(&vecscat));
7519566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&list1));
7520cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
7533564f093SHong Zhang   } else {
7540cf2e031SBarry Smith     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
7550cf2e031SBarry Smith     PetscInt  ndim = fft->ndim, *dim = fft->dim;
7560cf2e031SBarry Smith     ptrdiff_t local_n0, local_0_start;
7570cf2e031SBarry Smith     ptrdiff_t local_n1, local_1_start;
7580cf2e031SBarry Smith     IS        list2;
7590cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX)
7600cf2e031SBarry Smith     PetscInt  i, j, k, partial_dim;
7610cf2e031SBarry Smith     PetscInt *indx1, *indx2, tempindx, tempindx1;
7620cf2e031SBarry Smith     PetscInt  NM;
7630cf2e031SBarry Smith     ptrdiff_t temp;
7640cf2e031SBarry Smith #endif
7650cf2e031SBarry Smith 
7663c3a9c75SAmlan Barua     switch (ndim) {
7673c3a9c75SAmlan Barua     case 1:
76864657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7697a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
77026fbe8dcSKarl Rupp 
7719566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1));
7729566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2));
7739566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
7749566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7759566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7769566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
7779566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
7789566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
77964657d84SAmlan Barua #else
780e5d7f247SAmlan Barua       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
78164657d84SAmlan Barua #endif
7823c3a9c75SAmlan Barua       break;
7833c3a9c75SAmlan Barua     case 2:
784bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7857a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
78626fbe8dcSKarl Rupp 
7879566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
7889566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
7899566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
7909566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7919566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7929566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
7939566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
7949566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
795bd59e6c6SAmlan Barua #else
796c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
79726fbe8dcSKarl Rupp 
7989566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1));
7999566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2));
8003c3a9c75SAmlan Barua 
8013564f093SHong Zhang       if (dim[1] % 2 == 0) {
8023c3a9c75SAmlan Barua         NM = dim[1] + 2;
8033564f093SHong Zhang       } else {
8043c3a9c75SAmlan Barua         NM = dim[1] + 1;
8053564f093SHong Zhang       }
8063c3a9c75SAmlan Barua       for (i = 0; i < local_n0; i++) {
8073c3a9c75SAmlan Barua         for (j = 0; j < dim[1]; j++) {
8083c3a9c75SAmlan Barua           tempindx  = i * dim[1] + j;
8093c3a9c75SAmlan Barua           tempindx1 = i * NM + j;
81026fbe8dcSKarl Rupp 
8115b3e41ffSAmlan Barua           indx1[tempindx] = local_0_start * dim[1] + tempindx;
8123c3a9c75SAmlan Barua           indx2[tempindx] = low + tempindx1;
8133c3a9c75SAmlan Barua         }
8143c3a9c75SAmlan Barua       }
8153c3a9c75SAmlan Barua 
8169566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
8179566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
8183c3a9c75SAmlan Barua 
8199566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8209566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8219566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8229566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8239566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8249566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
8259566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
8269566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
827bd59e6c6SAmlan Barua #endif
8289496c9aeSAmlan Barua       break;
8293c3a9c75SAmlan Barua 
8303c3a9c75SAmlan Barua     case 3:
831bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
8327a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
83326fbe8dcSKarl Rupp 
8349566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
8359566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
8369566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8379566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8389566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8399566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8409566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8419566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
842bd59e6c6SAmlan Barua #else
843c4762a1bSJed Brown       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
844f1ade23cSHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
8457a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
84626fbe8dcSKarl Rupp 
8479566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1));
8489566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2));
84951d1eed7SAmlan Barua 
850db4deed7SKarl Rupp       if (dim[2] % 2 == 0) NM = dim[2] + 2;
851db4deed7SKarl Rupp       else NM = dim[2] + 1;
85251d1eed7SAmlan Barua 
85351d1eed7SAmlan Barua       for (i = 0; i < local_n0; i++) {
85451d1eed7SAmlan Barua         for (j = 0; j < dim[1]; j++) {
85551d1eed7SAmlan Barua           for (k = 0; k < dim[2]; k++) {
85651d1eed7SAmlan Barua             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
85751d1eed7SAmlan Barua             tempindx1 = i * dim[1] * NM + j * NM + k;
85826fbe8dcSKarl Rupp 
85951d1eed7SAmlan Barua             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
86051d1eed7SAmlan Barua             indx2[tempindx] = low + tempindx1;
86151d1eed7SAmlan Barua           }
86251d1eed7SAmlan Barua         }
86351d1eed7SAmlan Barua       }
86451d1eed7SAmlan Barua 
8659566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
8669566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
8679566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8689566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8699566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8709566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8719566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8729566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
8739566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
8749566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
875bd59e6c6SAmlan Barua #endif
8769496c9aeSAmlan Barua       break;
8773c3a9c75SAmlan Barua 
8783c3a9c75SAmlan Barua     default:
879bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
8807a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
88126fbe8dcSKarl Rupp 
8829566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
8839566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
8849566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8859566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8869566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8879566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8889566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8899566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
890bd59e6c6SAmlan Barua #else
891c4762a1bSJed Brown       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
892f1ade23cSHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
893e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
89426fbe8dcSKarl Rupp 
895e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
89626fbe8dcSKarl Rupp 
8977a21806cSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
89826fbe8dcSKarl Rupp 
899e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
900e5d7f247SAmlan Barua 
901e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
902e5d7f247SAmlan Barua 
9039566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1));
9049566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2));
905e5d7f247SAmlan Barua 
906db4deed7SKarl Rupp       if (dim[ndim - 1] % 2 == 0) NM = 2;
907db4deed7SKarl Rupp       else NM = 1;
908e5d7f247SAmlan Barua 
9096971673cSAmlan Barua       j = low;
9103564f093SHong Zhang       for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
9116971673cSAmlan Barua         indx1[i] = local_0_start * partial_dim + i;
9126971673cSAmlan Barua         indx2[i] = j;
91326fbe8dcSKarl Rupp         if (k % dim[ndim - 1] == 0) j += NM;
9146971673cSAmlan Barua         j++;
9156971673cSAmlan Barua       }
9169566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
9179566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
9189566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
9199566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9209566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9219566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
9229566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
9239566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
9249566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
9259566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
926bd59e6c6SAmlan Barua #endif
9279496c9aeSAmlan Barua       break;
9283c3a9c75SAmlan Barua     }
9290cf2e031SBarry Smith #endif
930e81bb599SAmlan Barua   }
9313564f093SHong Zhang   PetscFunctionReturn(0);
9323c3a9c75SAmlan Barua }
9333c3a9c75SAmlan Barua 
9343564f093SHong Zhang /*@
93511a5261eSBarry Smith    VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.
9363564f093SHong Zhang 
9373564f093SHong Zhang    Collective on Mat
9383564f093SHong Zhang 
9393564f093SHong Zhang     Input Parameters:
94011a5261eSBarry Smith +   A - `MATFFTW` matrix
9413564f093SHong Zhang -   x - FFTW vector
9423564f093SHong Zhang 
9433564f093SHong Zhang    Output Parameters:
9443564f093SHong Zhang .  y - PETSc vector
9453564f093SHong Zhang 
9463564f093SHong Zhang    Level: intermediate
9473564f093SHong Zhang 
94811a5261eSBarry Smith    Note:
94911a5261eSBarry Smith    While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
95011a5261eSBarry Smith    `VecScatterFFTWToPetsc()` removes those extra zeros.
9513564f093SHong Zhang 
95211a5261eSBarry Smith .seealso: `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
9533564f093SHong Zhang @*/
9549371c9d4SSatish Balay PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y) {
9553c3a9c75SAmlan Barua   PetscFunctionBegin;
956cac4c232SBarry Smith   PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
9573c3a9c75SAmlan Barua   PetscFunctionReturn(0);
9583c3a9c75SAmlan Barua }
95954efbe56SHong Zhang 
9609371c9d4SSatish Balay PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y) {
961ce94432eSBarry Smith   MPI_Comm    comm;
9625b3e41ffSAmlan Barua   Mat_FFT    *fft = (Mat_FFT *)A->data;
9639496c9aeSAmlan Barua   PetscInt    low;
9647a21806cSHong Zhang   PetscMPIInt rank, size;
9655b3e41ffSAmlan Barua   VecScatter  vecscat;
9660cf2e031SBarry Smith   IS          list1;
9679496c9aeSAmlan Barua 
9683564f093SHong Zhang   PetscFunctionBegin;
9699566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
9709566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
9719566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
9729566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRange(x, &low, NULL));
9735b3e41ffSAmlan Barua 
974e81bb599SAmlan Barua   if (size == 1) {
9759566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
9769566063dSJacob Faibussowitsch     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
9779566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9789566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9799566063dSJacob Faibussowitsch     PetscCall(VecScatterDestroy(&vecscat));
9809566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&list1));
981e81bb599SAmlan Barua 
9820cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
9833564f093SHong Zhang   } else {
9840cf2e031SBarry Smith     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
9850cf2e031SBarry Smith     PetscInt  ndim = fft->ndim, *dim = fft->dim;
9860cf2e031SBarry Smith     ptrdiff_t local_n0, local_0_start;
9870cf2e031SBarry Smith     ptrdiff_t local_n1, local_1_start;
9880cf2e031SBarry Smith     IS        list2;
9890cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX)
9900cf2e031SBarry Smith     PetscInt  i, j, k, partial_dim;
9910cf2e031SBarry Smith     PetscInt *indx1, *indx2, tempindx, tempindx1;
9920cf2e031SBarry Smith     PetscInt  NM;
9930cf2e031SBarry Smith     ptrdiff_t temp;
9940cf2e031SBarry Smith #endif
995e81bb599SAmlan Barua     switch (ndim) {
996e81bb599SAmlan Barua     case 1:
99764657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
9987a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
99926fbe8dcSKarl Rupp 
10009566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1));
10019566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2));
10029566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
10039566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10049566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10059566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10069566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10079566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
100864657d84SAmlan Barua #else
10096a506ed8SAmlan Barua       SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
101064657d84SAmlan Barua #endif
10115b3e41ffSAmlan Barua       break;
10125b3e41ffSAmlan Barua     case 2:
1013bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10147a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
101526fbe8dcSKarl Rupp 
10169566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
10179566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
10189566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
10199566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10209566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10219566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10229566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10239566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1024bd59e6c6SAmlan Barua #else
10257a21806cSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
102626fbe8dcSKarl Rupp 
10279566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1));
10289566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2));
10295b3e41ffSAmlan Barua 
1030db4deed7SKarl Rupp       if (dim[1] % 2 == 0) NM = dim[1] + 2;
1031db4deed7SKarl Rupp       else NM = dim[1] + 1;
10325b3e41ffSAmlan Barua 
10335b3e41ffSAmlan Barua       for (i = 0; i < local_n0; i++) {
10345b3e41ffSAmlan Barua         for (j = 0; j < dim[1]; j++) {
10355b3e41ffSAmlan Barua           tempindx  = i * dim[1] + j;
10365b3e41ffSAmlan Barua           tempindx1 = i * NM + j;
103726fbe8dcSKarl Rupp 
10385b3e41ffSAmlan Barua           indx1[tempindx] = local_0_start * dim[1] + tempindx;
10395b3e41ffSAmlan Barua           indx2[tempindx] = low + tempindx1;
10405b3e41ffSAmlan Barua         }
10415b3e41ffSAmlan Barua       }
10425b3e41ffSAmlan Barua 
10439566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
10449566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
10455b3e41ffSAmlan Barua 
10469566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
10479566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10489566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10499566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10509566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10519566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
10529566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
10539566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1054bd59e6c6SAmlan Barua #endif
10559496c9aeSAmlan Barua       break;
10565b3e41ffSAmlan Barua     case 3:
1057bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10587a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
105926fbe8dcSKarl Rupp 
10609566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
10619566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
10629566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
10639566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10649566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10659566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10669566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10679566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1068bd59e6c6SAmlan Barua #else
10697a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
107026fbe8dcSKarl Rupp 
10719566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1));
10729566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2));
107351d1eed7SAmlan Barua 
1074db4deed7SKarl Rupp       if (dim[2] % 2 == 0) NM = dim[2] + 2;
1075db4deed7SKarl Rupp       else NM = dim[2] + 1;
107651d1eed7SAmlan Barua 
107751d1eed7SAmlan Barua       for (i = 0; i < local_n0; i++) {
107851d1eed7SAmlan Barua         for (j = 0; j < dim[1]; j++) {
107951d1eed7SAmlan Barua           for (k = 0; k < dim[2]; k++) {
108051d1eed7SAmlan Barua             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
108151d1eed7SAmlan Barua             tempindx1 = i * dim[1] * NM + j * NM + k;
108226fbe8dcSKarl Rupp 
108351d1eed7SAmlan Barua             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
108451d1eed7SAmlan Barua             indx2[tempindx] = low + tempindx1;
108551d1eed7SAmlan Barua           }
108651d1eed7SAmlan Barua         }
108751d1eed7SAmlan Barua       }
108851d1eed7SAmlan Barua 
10899566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
10909566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
109151d1eed7SAmlan Barua 
10929566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
10939566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10949566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10959566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10969566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10979566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
10989566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
10999566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1100bd59e6c6SAmlan Barua #endif
11019496c9aeSAmlan Barua       break;
11025b3e41ffSAmlan Barua     default:
1103bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
11047a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
110526fbe8dcSKarl Rupp 
11069566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
11079566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
11089566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
11099566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11109566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11119566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
11129566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
11139566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1114bd59e6c6SAmlan Barua #else
1115ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
111626fbe8dcSKarl Rupp 
1117ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
111826fbe8dcSKarl Rupp 
1119c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
112026fbe8dcSKarl Rupp 
1121ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
1122ba6e06d0SAmlan Barua 
1123ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1124ba6e06d0SAmlan Barua 
11259566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1));
11269566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2));
1127ba6e06d0SAmlan Barua 
1128db4deed7SKarl Rupp       if (dim[ndim - 1] % 2 == 0) NM = 2;
1129db4deed7SKarl Rupp       else NM = 1;
1130ba6e06d0SAmlan Barua 
1131ba6e06d0SAmlan Barua       j = low;
11323564f093SHong Zhang       for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
1133ba6e06d0SAmlan Barua         indx1[i] = local_0_start * partial_dim + i;
1134ba6e06d0SAmlan Barua         indx2[i] = j;
11353564f093SHong Zhang         if (k % dim[ndim - 1] == 0) j += NM;
1136ba6e06d0SAmlan Barua         j++;
1137ba6e06d0SAmlan Barua       }
11389566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
11399566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
1140ba6e06d0SAmlan Barua 
11419566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
11429566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11439566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11449566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
11459566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
11469566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
11479566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
11489566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1149bd59e6c6SAmlan Barua #endif
11509496c9aeSAmlan Barua       break;
11515b3e41ffSAmlan Barua     }
11520cf2e031SBarry Smith #endif
1153e81bb599SAmlan Barua   }
11543564f093SHong Zhang   PetscFunctionReturn(0);
11555b3e41ffSAmlan Barua }
11565b3e41ffSAmlan Barua 
11573c3a9c75SAmlan Barua /*
11583564f093SHong Zhang     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
11593564f093SHong Zhang 
11603c3a9c75SAmlan Barua   Options Database Keys:
11613c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
11623c3a9c75SAmlan Barua 
11633c3a9c75SAmlan Barua    Level: intermediate
11643c3a9c75SAmlan Barua 
11653c3a9c75SAmlan Barua */
11669371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) {
1167ce94432eSBarry Smith   MPI_Comm    comm;
1168b2b77a04SHong Zhang   Mat_FFT    *fft = (Mat_FFT *)A->data;
1169b2b77a04SHong Zhang   Mat_FFTW   *fftw;
11700cf2e031SBarry Smith   PetscInt    ndim = fft->ndim, *dim = fft->dim;
11715274ed1bSHong Zhang   const char *plans[]  = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
11725274ed1bSHong Zhang   unsigned    iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1173b2b77a04SHong Zhang   PetscBool   flg;
11744f48bc67SAmlan Barua   PetscInt    p_flag, partial_dim = 1, ctr;
117511902ff2SHong Zhang   PetscMPIInt size, rank;
11769496c9aeSAmlan Barua   ptrdiff_t  *pdim;
11779496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11780cf2e031SBarry Smith   PetscInt tot_dim;
11799496c9aeSAmlan Barua #endif
11809496c9aeSAmlan Barua 
1181b2b77a04SHong Zhang   PetscFunctionBegin;
11829566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
11839566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
11849566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1185c92418ccSAmlan Barua 
11860cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
11871878d478SAmlan Barua   fftw_mpi_init();
11880cf2e031SBarry Smith #endif
118911902ff2SHong Zhang   pdim    = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
119011902ff2SHong Zhang   pdim[0] = dim[0];
11918ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11928ca4c034SAmlan Barua   tot_dim = 2 * dim[0];
11938ca4c034SAmlan Barua #endif
11943564f093SHong Zhang   for (ctr = 1; ctr < ndim; ctr++) {
11956882372aSHong Zhang     partial_dim *= dim[ctr];
119611902ff2SHong Zhang     pdim[ctr] = dim[ctr];
11978ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1198db4deed7SKarl Rupp     if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1199db4deed7SKarl Rupp     else tot_dim *= dim[ctr];
12008ca4c034SAmlan Barua #endif
12016882372aSHong Zhang   }
12023c3a9c75SAmlan Barua 
1203b2b77a04SHong Zhang   if (size == 1) {
1204e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12059566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
12060cf2e031SBarry Smith     fft->n = fft->N;
1207e81bb599SAmlan Barua #else
12089566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
12090cf2e031SBarry Smith     fft->n       = tot_dim;
12100cf2e031SBarry Smith #endif
12110cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
12120cf2e031SBarry Smith   } else {
12130cf2e031SBarry Smith     ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
12140cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX)
12150cf2e031SBarry Smith     ptrdiff_t temp;
12160cf2e031SBarry Smith     PetscInt  N1;
1217e81bb599SAmlan Barua #endif
1218e81bb599SAmlan Barua 
1219b2b77a04SHong Zhang     switch (ndim) {
1220b2b77a04SHong Zhang     case 1:
12213c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12223c3a9c75SAmlan Barua       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1223e5d7f247SAmlan Barua #else
12247a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
12250cf2e031SBarry Smith       fft->n = (PetscInt)local_n0;
12269566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N));
1227e5d7f247SAmlan Barua #endif
1228b2b77a04SHong Zhang       break;
1229b2b77a04SHong Zhang     case 2:
12305b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
12317a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
12320cf2e031SBarry Smith       fft->n = (PetscInt)local_n0 * dim[1];
12339566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
12345b3e41ffSAmlan Barua #else
1235c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
123626fbe8dcSKarl Rupp 
12370cf2e031SBarry Smith       fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
12389566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
12395b3e41ffSAmlan Barua #endif
1240b2b77a04SHong Zhang       break;
1241b2b77a04SHong Zhang     case 3:
124251d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12437a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
124426fbe8dcSKarl Rupp 
12450cf2e031SBarry Smith       fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
12469566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
124751d1eed7SAmlan Barua #else
1248c3eba89aSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
124926fbe8dcSKarl Rupp 
12500cf2e031SBarry Smith       fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
12519566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
125251d1eed7SAmlan Barua #endif
1253b2b77a04SHong Zhang       break;
1254b2b77a04SHong Zhang     default:
1255b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12567a21806cSHong Zhang       fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);
125726fbe8dcSKarl Rupp 
12580cf2e031SBarry Smith       fft->n = (PetscInt)local_n0 * partial_dim;
12599566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1260b3a17365SAmlan Barua #else
1261b3a17365SAmlan Barua       temp = pdim[ndim - 1];
126226fbe8dcSKarl Rupp 
1263b3a17365SAmlan Barua       pdim[ndim - 1] = temp / 2 + 1;
126426fbe8dcSKarl Rupp 
1265c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
126626fbe8dcSKarl Rupp 
12670cf2e031SBarry Smith       fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp;
12680cf2e031SBarry Smith       N1     = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);
126926fbe8dcSKarl Rupp 
1270b3a17365SAmlan Barua       pdim[ndim - 1] = temp;
127126fbe8dcSKarl Rupp 
12729566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1273b3a17365SAmlan Barua #endif
1274b2b77a04SHong Zhang       break;
1275b2b77a04SHong Zhang     }
12760cf2e031SBarry Smith #endif
1277b2b77a04SHong Zhang   }
12782277172eSMark Adams   free(pdim);
12799566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
1280*4dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&fftw));
1281b2b77a04SHong Zhang   fft->data = (void *)fftw;
1282b2b77a04SHong Zhang 
12830c74a584SJed Brown   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1284e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
128526fbe8dcSKarl Rupp 
12869566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw)));
12878c1d5ab3SHong Zhang   if (size == 1) {
1288a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
1289a1b6d50cSHong Zhang     fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1290a1b6d50cSHong Zhang #else
12918c1d5ab3SHong Zhang     fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1292a1b6d50cSHong Zhang #endif
12938c1d5ab3SHong Zhang   }
129426fbe8dcSKarl Rupp 
1295b1301b2eSHong Zhang   for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];
1296c92418ccSAmlan Barua 
1297f4259b30SLisandro Dalcin   fftw->p_forward  = NULL;
1298f4259b30SLisandro Dalcin   fftw->p_backward = NULL;
1299b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1300b2b77a04SHong Zhang 
1301b2b77a04SHong Zhang   if (size == 1) {
1302b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1303b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
13040cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
1305b2b77a04SHong Zhang   } else {
1306b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1307b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
13080cf2e031SBarry Smith #endif
1309b2b77a04SHong Zhang   }
1310b2b77a04SHong Zhang   fft->matdestroy = MatDestroy_FFTW;
1311b2b77a04SHong Zhang   A->assembled    = PETSC_TRUE;
13124a52bad8SHong Zhang   A->preallocated = PETSC_TRUE;
131326fbe8dcSKarl Rupp 
13149566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
13159566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
13169566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));
1317b2b77a04SHong Zhang 
1318b2b77a04SHong Zhang   /* get runtime options */
1319d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
13209566063dSJacob Faibussowitsch   PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
13215f80ce2aSJacob Faibussowitsch   if (flg) fftw->p_flag = iplans[p_flag];
1322d0609cedSBarry Smith   PetscOptionsEnd();
1323b2b77a04SHong Zhang   PetscFunctionReturn(0);
1324b2b77a04SHong Zhang }
1325