xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 523895ee4f068e6401cdb7a71a1bc8643023609c)
1b2b77a04SHong Zhang /*
2b2b77a04SHong Zhang     Provides an interface to the FFTW package.
3c4762a1bSJed Brown     Testing examples can be found in ~src/mat/tests
4b2b77a04SHong Zhang */
5b2b77a04SHong Zhang 
6c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/
7b2b77a04SHong Zhang EXTERN_C_BEGIN
80cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
9c6db04a5SJed Brown   #include <fftw3-mpi.h>
100cf2e031SBarry Smith #else
110cf2e031SBarry Smith   #include <fftw3.h>
120cf2e031SBarry Smith #endif
13b2b77a04SHong Zhang EXTERN_C_END
14b2b77a04SHong Zhang 
15b2b77a04SHong Zhang typedef struct {
16b9ae062cSHong Zhang   ptrdiff_t ndim_fftw, *dim_fftw;
17a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
18a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
19a1b6d50cSHong Zhang #else
208c1d5ab3SHong Zhang   fftw_iodim *iodims;
21a1b6d50cSHong Zhang #endif
22e5d7f247SAmlan Barua   PetscInt     partial_dim;
23b2b77a04SHong Zhang   fftw_plan    p_forward, p_backward;
24b2b77a04SHong Zhang   unsigned     p_flag;                                      /* planner flags, FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
25e8bdd179SBarry Smith   PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw_execute() can only be executed for the arrays with which the plan was created */
26b2b77a04SHong Zhang } Mat_FFTW;
27b2b77a04SHong Zhang 
280cf2e031SBarry Smith extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
29b2b77a04SHong Zhang 
30*523895eeSPierre Jolivet static PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
31d71ae5a4SJacob Faibussowitsch {
32b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
33b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
34f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
35f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
36e8bdd179SBarry Smith   Vec                xx;
371acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
38a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
39a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
40a1b6d50cSHong Zhang   #else
418c1d5ab3SHong Zhang   fftw_iodim *iodims;
42a1b6d50cSHong Zhang   #endif
431acd80e4SHong Zhang   PetscInt i;
441acd80e4SHong Zhang #endif
451acd80e4SHong Zhang   PetscInt ndim = fft->ndim, *dim = fft->dim;
46b2b77a04SHong Zhang 
47b2b77a04SHong Zhang   PetscFunctionBegin;
48e8bdd179SBarry Smith   if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
49e8bdd179SBarry Smith     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
50e8bdd179SBarry Smith     PetscCall(VecDuplicate(x, &xx));
51e8bdd179SBarry Smith     PetscCall(VecGetArrayRead(xx, &x_array));
52e8bdd179SBarry Smith   } else {
539566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &x_array));
54e8bdd179SBarry Smith   }
559566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
566aad120cSJose E. Roman   if (!fftw->p_forward) { /* create a plan, then execute it */
57b2b77a04SHong Zhang     switch (ndim) {
58b2b77a04SHong Zhang     case 1:
5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
60b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
6158a912c5SAmlan Barua #else
6258a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
6358a912c5SAmlan Barua #endif
64b2b77a04SHong Zhang       break;
65b2b77a04SHong Zhang     case 2:
6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
67b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
6858a912c5SAmlan Barua #else
6958a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
7058a912c5SAmlan Barua #endif
71b2b77a04SHong Zhang       break;
72b2b77a04SHong Zhang     case 3:
7358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
74b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
7558a912c5SAmlan Barua #else
7658a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
7758a912c5SAmlan Barua #endif
78b2b77a04SHong Zhang       break;
79b2b77a04SHong Zhang     default:
8058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
81a1b6d50cSHong Zhang       iodims = fftw->iodims;
82a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
838c1d5ab3SHong Zhang       if (ndim) {
84a1b6d50cSHong Zhang         iodims[ndim - 1].n  = (ptrdiff_t)dim[ndim - 1];
858c1d5ab3SHong Zhang         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
868c1d5ab3SHong Zhang         for (i = ndim - 2; i >= 0; --i) {
87a1b6d50cSHong Zhang           iodims[i].n  = (ptrdiff_t)dim[i];
888c1d5ab3SHong Zhang           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
898c1d5ab3SHong Zhang         }
908c1d5ab3SHong Zhang       }
91a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
92a1b6d50cSHong Zhang   #else
93a1b6d50cSHong Zhang       if (ndim) {
94a1b6d50cSHong Zhang         iodims[ndim - 1].n  = (int)dim[ndim - 1];
95a1b6d50cSHong Zhang         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
96a1b6d50cSHong Zhang         for (i = ndim - 2; i >= 0; --i) {
97a1b6d50cSHong Zhang           iodims[i].n  = (int)dim[i];
98a1b6d50cSHong Zhang           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
99a1b6d50cSHong Zhang         }
100a1b6d50cSHong Zhang       }
101a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
102a1b6d50cSHong Zhang   #endif
103a1b6d50cSHong Zhang 
10458a912c5SAmlan Barua #else
105a31b9140SHong Zhang       fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
10658a912c5SAmlan Barua #endif
107b2b77a04SHong Zhang       break;
108b2b77a04SHong Zhang     }
109e8bdd179SBarry Smith     if (fftw->p_flag != FFTW_ESTIMATE) {
110e8bdd179SBarry Smith       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
111e8bdd179SBarry Smith       PetscCall(VecRestoreArrayRead(xx, &x_array));
112e8bdd179SBarry Smith       PetscCall(VecDestroy(&xx));
113e8bdd179SBarry Smith       PetscCall(VecGetArrayRead(x, &x_array));
114e8bdd179SBarry Smith     } else {
115f4fca6d4SMatthew G. Knepley       fftw->finarray  = (PetscScalar *)x_array;
116b2b77a04SHong Zhang       fftw->foutarray = y_array;
117e8bdd179SBarry Smith     }
118e8bdd179SBarry Smith   }
119e8bdd179SBarry Smith 
120b2b77a04SHong Zhang   if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
1217e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
122b2b77a04SHong Zhang     fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
1237e4bc134SDominic Meiser #else
1247e4bc134SDominic Meiser     fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
1257e4bc134SDominic Meiser #endif
126b2b77a04SHong Zhang   } else {
127b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
128b2b77a04SHong Zhang   }
1299566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
1309566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
1313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
132b2b77a04SHong Zhang }
133b2b77a04SHong Zhang 
134*523895eeSPierre Jolivet static PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
135d71ae5a4SJacob Faibussowitsch {
136b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
137b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
138f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
139f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
140b2b77a04SHong Zhang   PetscInt           ndim = fft->ndim, *dim = fft->dim;
141e8bdd179SBarry Smith   Vec                xx;
1421acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
143a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
144a1b6d50cSHong Zhang   fftw_iodim64 *iodims = fftw->iodims;
145a1b6d50cSHong Zhang   #else
146a1b6d50cSHong Zhang   fftw_iodim *iodims = fftw->iodims;
147a1b6d50cSHong Zhang   #endif
1481acd80e4SHong Zhang #endif
149b2b77a04SHong Zhang 
150b2b77a04SHong Zhang   PetscFunctionBegin;
151e8bdd179SBarry Smith   if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
152e8bdd179SBarry Smith     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
153e8bdd179SBarry Smith     PetscCall(VecDuplicate(x, &xx));
154e8bdd179SBarry Smith     PetscCall(VecGetArrayRead(xx, &x_array));
155e8bdd179SBarry Smith   } else {
1569566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &x_array));
157e8bdd179SBarry Smith   }
1589566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
1596aad120cSJose E. Roman   if (!fftw->p_backward) { /* create a plan, then execute it */
160b2b77a04SHong Zhang     switch (ndim) {
161b2b77a04SHong Zhang     case 1:
16258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
163b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
16458a912c5SAmlan Barua #else
165e81bb599SAmlan Barua       fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
16658a912c5SAmlan Barua #endif
167b2b77a04SHong Zhang       break;
168b2b77a04SHong Zhang     case 2:
16958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
170b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
17158a912c5SAmlan Barua #else
172e81bb599SAmlan Barua       fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
17358a912c5SAmlan Barua #endif
174b2b77a04SHong Zhang       break;
175b2b77a04SHong Zhang     case 3:
17658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
177b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
17858a912c5SAmlan Barua #else
179e81bb599SAmlan Barua       fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
18058a912c5SAmlan Barua #endif
181b2b77a04SHong Zhang       break;
182b2b77a04SHong Zhang     default:
18358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
184a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
185a1b6d50cSHong Zhang       fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
186a1b6d50cSHong Zhang   #else
1878c1d5ab3SHong Zhang       fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
188a1b6d50cSHong Zhang   #endif
18958a912c5SAmlan Barua #else
190a31b9140SHong Zhang       fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
19158a912c5SAmlan Barua #endif
192b2b77a04SHong Zhang       break;
193b2b77a04SHong Zhang     }
194e8bdd179SBarry Smith     if (fftw->p_flag != FFTW_ESTIMATE) {
195e8bdd179SBarry Smith       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
196e8bdd179SBarry Smith       PetscCall(VecRestoreArrayRead(xx, &x_array));
197e8bdd179SBarry Smith       PetscCall(VecDestroy(&xx));
198e8bdd179SBarry Smith       PetscCall(VecGetArrayRead(x, &x_array));
199e8bdd179SBarry Smith     } else {
200f4fca6d4SMatthew G. Knepley       fftw->binarray  = (PetscScalar *)x_array;
201b2b77a04SHong Zhang       fftw->boutarray = y_array;
202e8bdd179SBarry Smith     }
203e8bdd179SBarry Smith   }
204b2b77a04SHong Zhang   if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
2057e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
206b2b77a04SHong Zhang     fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
2077e4bc134SDominic Meiser #else
2087e4bc134SDominic Meiser     fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
2097e4bc134SDominic Meiser #endif
210b2b77a04SHong Zhang   } else {
2112f613bf5SBarry Smith     fftw_execute(fftw->p_backward);
212b2b77a04SHong Zhang   }
2139566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
2149566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
2153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
216b2b77a04SHong Zhang }
217b2b77a04SHong Zhang 
2180cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
219*523895eeSPierre Jolivet static PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
220d71ae5a4SJacob Faibussowitsch {
221b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
222b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
223f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
224f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
225c92418ccSAmlan Barua   PetscInt           ndim = fft->ndim, *dim = fft->dim;
226ce94432eSBarry Smith   MPI_Comm           comm;
227e8bdd179SBarry Smith   Vec                xx;
228b2b77a04SHong Zhang 
229b2b77a04SHong Zhang   PetscFunctionBegin;
2309566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
231e8bdd179SBarry Smith   if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
232e8bdd179SBarry Smith     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
233e8bdd179SBarry Smith     PetscCall(VecDuplicate(x, &xx));
234e8bdd179SBarry Smith     PetscCall(VecGetArrayRead(xx, &x_array));
235e8bdd179SBarry Smith   } else {
2369566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &x_array));
237e8bdd179SBarry Smith   }
2389566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
2396aad120cSJose E. Roman   if (!fftw->p_forward) { /* create a plan, then execute it */
240b2b77a04SHong Zhang     switch (ndim) {
241b2b77a04SHong Zhang     case 1:
2423c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
243b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
244ae0a50aaSHong Zhang   #else
2454f8276c3SHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
2463c3a9c75SAmlan Barua   #endif
247b2b77a04SHong Zhang       break;
248b2b77a04SHong Zhang     case 2:
24997504071SAmlan Barua   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
250b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2513c3a9c75SAmlan Barua   #else
2523c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2533c3a9c75SAmlan Barua   #endif
254b2b77a04SHong Zhang       break;
255b2b77a04SHong Zhang     case 3:
2563c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
257b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2583c3a9c75SAmlan Barua   #else
25951d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2603c3a9c75SAmlan Barua   #endif
261b2b77a04SHong Zhang       break;
262b2b77a04SHong Zhang     default:
2633c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
264c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2653c3a9c75SAmlan Barua   #else
2663c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2673c3a9c75SAmlan Barua   #endif
268b2b77a04SHong Zhang       break;
269b2b77a04SHong Zhang     }
270e8bdd179SBarry Smith     if (fftw->p_flag != FFTW_ESTIMATE) {
271e8bdd179SBarry Smith       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
272e8bdd179SBarry Smith       PetscCall(VecRestoreArrayRead(xx, &x_array));
273e8bdd179SBarry Smith       PetscCall(VecDestroy(&xx));
274e8bdd179SBarry Smith       PetscCall(VecGetArrayRead(x, &x_array));
275e8bdd179SBarry Smith     } else {
276f4fca6d4SMatthew G. Knepley       fftw->finarray  = (PetscScalar *)x_array;
277b2b77a04SHong Zhang       fftw->foutarray = y_array;
278e8bdd179SBarry Smith     }
279e8bdd179SBarry Smith   }
280b2b77a04SHong Zhang   if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
281b2b77a04SHong Zhang     fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
282b2b77a04SHong Zhang   } else {
283b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
284b2b77a04SHong Zhang   }
2859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
2869566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
2873ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
288b2b77a04SHong Zhang }
289b2b77a04SHong Zhang 
290*523895eeSPierre Jolivet static PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
291d71ae5a4SJacob Faibussowitsch {
292b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
293b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
294f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
295f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
296c92418ccSAmlan Barua   PetscInt           ndim = fft->ndim, *dim = fft->dim;
297ce94432eSBarry Smith   MPI_Comm           comm;
298e8bdd179SBarry Smith   Vec                xx;
299b2b77a04SHong Zhang 
300b2b77a04SHong Zhang   PetscFunctionBegin;
3019566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
302e8bdd179SBarry Smith   if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
303e8bdd179SBarry Smith     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
304e8bdd179SBarry Smith     PetscCall(VecDuplicate(x, &xx));
305e8bdd179SBarry Smith     PetscCall(VecGetArrayRead(xx, &x_array));
306e8bdd179SBarry Smith   } else {
3079566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &x_array));
308e8bdd179SBarry Smith   }
3099566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
3106aad120cSJose E. Roman   if (!fftw->p_backward) { /* create a plan, then execute it */
311b2b77a04SHong Zhang     switch (ndim) {
312b2b77a04SHong Zhang     case 1:
3133c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
314b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
315ae0a50aaSHong Zhang   #else
3164f8276c3SHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
3173c3a9c75SAmlan Barua   #endif
318b2b77a04SHong Zhang       break;
319b2b77a04SHong Zhang     case 2:
32097504071SAmlan Barua   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
321b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
3223c3a9c75SAmlan Barua   #else
3233c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3243c3a9c75SAmlan Barua   #endif
325b2b77a04SHong Zhang       break;
326b2b77a04SHong Zhang     case 3:
3273c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
328b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
3293c3a9c75SAmlan Barua   #else
3303c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3313c3a9c75SAmlan Barua   #endif
332b2b77a04SHong Zhang       break;
333b2b77a04SHong Zhang     default:
3343c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
335c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
3363c3a9c75SAmlan Barua   #else
3373c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3383c3a9c75SAmlan Barua   #endif
339b2b77a04SHong Zhang       break;
340b2b77a04SHong Zhang     }
341e8bdd179SBarry Smith     if (fftw->p_flag != FFTW_ESTIMATE) {
342e8bdd179SBarry Smith       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
343e8bdd179SBarry Smith       PetscCall(VecRestoreArrayRead(xx, &x_array));
344e8bdd179SBarry Smith       PetscCall(VecDestroy(&xx));
345e8bdd179SBarry Smith       PetscCall(VecGetArrayRead(x, &x_array));
346e8bdd179SBarry Smith     } else {
347f4fca6d4SMatthew G. Knepley       fftw->binarray  = (PetscScalar *)x_array;
348b2b77a04SHong Zhang       fftw->boutarray = y_array;
349e8bdd179SBarry Smith     }
350e8bdd179SBarry Smith   }
351b2b77a04SHong Zhang   if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
352b2b77a04SHong Zhang     fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
353b2b77a04SHong Zhang   } else {
3542f613bf5SBarry Smith     fftw_execute(fftw->p_backward);
355b2b77a04SHong Zhang   }
3569566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
3579566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
3583ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
359b2b77a04SHong Zhang }
3600cf2e031SBarry Smith #endif
361b2b77a04SHong Zhang 
362*523895eeSPierre Jolivet static PetscErrorCode MatDestroy_FFTW(Mat A)
363d71ae5a4SJacob Faibussowitsch {
364b2b77a04SHong Zhang   Mat_FFT  *fft  = (Mat_FFT *)A->data;
365b2b77a04SHong Zhang   Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
366b2b77a04SHong Zhang 
367b2b77a04SHong Zhang   PetscFunctionBegin;
368b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
369b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
370ad540459SPierre Jolivet   if (fftw->iodims) free(fftw->iodims);
3719566063dSJacob Faibussowitsch   PetscCall(PetscFree(fftw->dim_fftw));
3729566063dSJacob Faibussowitsch   PetscCall(PetscFree(fft->data));
3732e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
3742e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
3752e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
3760cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
3776ccf0b3eSHong Zhang   fftw_mpi_cleanup();
3780cf2e031SBarry Smith #endif
3793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
380b2b77a04SHong Zhang }
381b2b77a04SHong Zhang 
3820cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
383c6db04a5SJed Brown   #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I  "petscvec.h"   I*/
384*523895eeSPierre Jolivet static PetscErrorCode VecDestroy_MPIFFTW(Vec v)
385d71ae5a4SJacob Faibussowitsch {
386b2b77a04SHong Zhang   PetscScalar *array;
387b2b77a04SHong Zhang 
388b2b77a04SHong Zhang   PetscFunctionBegin;
3899566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &array));
3902f613bf5SBarry Smith   fftw_free((fftw_complex *)array);
3919566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &array));
3929566063dSJacob Faibussowitsch   PetscCall(VecDestroy_MPI(v));
3933ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
394b2b77a04SHong Zhang }
3950cf2e031SBarry Smith #endif
396b2b77a04SHong Zhang 
3970cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
398d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
399d71ae5a4SJacob Faibussowitsch {
4005b113e21Ss-sajid-ali   Mat A;
4015b113e21Ss-sajid-ali 
4025b113e21Ss-sajid-ali   PetscFunctionBegin;
4039566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
4049566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
4053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4065b113e21Ss-sajid-ali }
4075b113e21Ss-sajid-ali 
408d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
409d71ae5a4SJacob Faibussowitsch {
4105b113e21Ss-sajid-ali   Mat A;
4115b113e21Ss-sajid-ali 
4125b113e21Ss-sajid-ali   PetscFunctionBegin;
4139566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
4149566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
4153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4165b113e21Ss-sajid-ali }
4175b113e21Ss-sajid-ali 
418d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
419d71ae5a4SJacob Faibussowitsch {
4205b113e21Ss-sajid-ali   Mat A;
4215b113e21Ss-sajid-ali 
4225b113e21Ss-sajid-ali   PetscFunctionBegin;
4239566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
4249566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
4253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4265b113e21Ss-sajid-ali }
4270cf2e031SBarry Smith #endif
4285b113e21Ss-sajid-ali 
4294be45526SHong Zhang /*@
4302a7a6963SBarry Smith   MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
43111a5261eSBarry Smith   parallel layout determined by `MATFFTW`
4324f7415efSAmlan Barua 
433c3339decSBarry Smith   Collective
4344f7415efSAmlan Barua 
4354f7415efSAmlan Barua   Input Parameter:
4363564f093SHong Zhang . A - the matrix
4374f7415efSAmlan Barua 
438d8d19677SJose E. Roman   Output Parameters:
439cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW
4406b867d5aSJose E. Roman . y - (optional) output vector of forward FFTW
441cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW
4424f7415efSAmlan Barua 
4432ef1f0ffSBarry Smith   Options Database Key:
4442ef1f0ffSBarry Smith . -mat_fftw_plannerflags - set FFTW planner flags
4452ef1f0ffSBarry Smith 
4464f7415efSAmlan Barua   Level: advanced
4473564f093SHong Zhang 
44811a5261eSBarry Smith   Notes:
44911a5261eSBarry Smith   The parallel layout of output of forward FFTW is always same as the input
45097504071SAmlan Barua   of the backward FFTW. But parallel layout of the input vector of forward
45197504071SAmlan Barua   FFTW might not be same as the output of backward FFTW.
45211a5261eSBarry Smith 
45311a5261eSBarry Smith   We need to provide enough space while doing parallel real transform.
45497504071SAmlan Barua   We need to pad extra zeros at the end of last dimension. For this reason the one needs to
45597504071SAmlan Barua   invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
456e0ec536eSAmlan Barua   last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
457e0ec536eSAmlan Barua   depends on if the last dimension is even or odd. If the last dimension is even need to pad two
458e0ec536eSAmlan Barua   zeros if it is odd only one zero is needed.
45911a5261eSBarry Smith 
460e0ec536eSAmlan Barua   Lastly one needs some scratch space at the end of data set in each process. alloc_local
461e0ec536eSAmlan Barua   figures out how much space is needed, i.e. it figures out the data+scratch space for
462e0ec536eSAmlan Barua   each processor and returns that.
4634f7415efSAmlan Barua 
464fe59aa6dSJacob Faibussowitsch   Developer Notes:
46511a5261eSBarry Smith   How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?
46611a5261eSBarry Smith 
4671cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
4684be45526SHong Zhang @*/
469d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
470d71ae5a4SJacob Faibussowitsch {
4714be45526SHong Zhang   PetscFunctionBegin;
472cac4c232SBarry Smith   PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
4733ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4744be45526SHong Zhang }
4754be45526SHong Zhang 
476d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
477d71ae5a4SJacob Faibussowitsch {
4784f7415efSAmlan Barua   PetscMPIInt size, rank;
479ce94432eSBarry Smith   MPI_Comm    comm;
4804f7415efSAmlan Barua   Mat_FFT    *fft = (Mat_FFT *)A->data;
4814f7415efSAmlan Barua 
4824f7415efSAmlan Barua   PetscFunctionBegin;
4834f7415efSAmlan Barua   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4844f7415efSAmlan Barua   PetscValidType(A, 1);
4859566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
4864f7415efSAmlan Barua 
4879566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
4889566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4894f7415efSAmlan Barua   if (size == 1) { /* sequential case */
4904f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4919566063dSJacob Faibussowitsch     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
4929566063dSJacob Faibussowitsch     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
4939566063dSJacob Faibussowitsch     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
4944f7415efSAmlan Barua #else
4959566063dSJacob Faibussowitsch     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
4969566063dSJacob Faibussowitsch     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
4979566063dSJacob Faibussowitsch     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
4984f7415efSAmlan Barua #endif
4990cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
50097504071SAmlan Barua   } else { /* parallel cases */
5010cf2e031SBarry Smith     Mat_FFTW     *fftw = (Mat_FFTW *)fft->data;
5020cf2e031SBarry Smith     PetscInt      ndim = fft->ndim, *dim = fft->dim;
5034f7415efSAmlan Barua     ptrdiff_t     alloc_local, local_n0, local_0_start;
5049496c9aeSAmlan Barua     ptrdiff_t     local_n1;
5059496c9aeSAmlan Barua     fftw_complex *data_fout;
5069496c9aeSAmlan Barua     ptrdiff_t     local_1_start;
5079496c9aeSAmlan Barua   #if defined(PETSC_USE_COMPLEX)
5089496c9aeSAmlan Barua     fftw_complex *data_fin, *data_bout;
5099496c9aeSAmlan Barua   #else
5104f7415efSAmlan Barua     double   *data_finr, *data_boutr;
5116ccf0b3eSHong Zhang     PetscInt  n1, N1;
5129496c9aeSAmlan Barua     ptrdiff_t temp;
5139496c9aeSAmlan Barua   #endif
5149496c9aeSAmlan Barua 
5154f7415efSAmlan Barua     switch (ndim) {
5164f7415efSAmlan Barua     case 1:
51757625b84SAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
5184f8276c3SHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
51957625b84SAmlan Barua   #else
52057625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
52157625b84SAmlan Barua       if (fin) {
52257625b84SAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5239566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin));
5249566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5255b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
52657625b84SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
52757625b84SAmlan Barua       }
52857625b84SAmlan Barua       if (fout) {
52957625b84SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5309566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout));
5319566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5325b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
53357625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
53457625b84SAmlan Barua       }
53557625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
53657625b84SAmlan Barua       if (bout) {
53757625b84SAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5389566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout));
5399566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5405b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
54157625b84SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
54257625b84SAmlan Barua       }
54357625b84SAmlan Barua       break;
54457625b84SAmlan Barua   #endif
5454f7415efSAmlan Barua     case 2:
54697504071SAmlan Barua   #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
5474f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
5489371c9d4SSatish Balay       N1          = 2 * dim[0] * (dim[1] / 2 + 1);
5499371c9d4SSatish Balay       n1          = 2 * local_n0 * (dim[1] / 2 + 1);
5504f7415efSAmlan Barua       if (fin) {
5514f7415efSAmlan Barua         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
5529566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
5539566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5545b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5554f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5564f7415efSAmlan Barua       }
55757625b84SAmlan Barua       if (fout) {
55857625b84SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5599566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
5609566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5615b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
56257625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
56357625b84SAmlan Barua       }
5645b113e21Ss-sajid-ali       if (bout) {
5655b113e21Ss-sajid-ali         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
5669566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
5679566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5685b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5695b113e21Ss-sajid-ali         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5705b113e21Ss-sajid-ali       }
5714f7415efSAmlan Barua   #else
5724f7415efSAmlan Barua       /* Get local size */
5734f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
5744f7415efSAmlan Barua       if (fin) {
5754f7415efSAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5769566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
5779566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5785b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5794f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5804f7415efSAmlan Barua       }
5814f7415efSAmlan Barua       if (fout) {
5824f7415efSAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5839566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
5849566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5855b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
5864f7415efSAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5874f7415efSAmlan Barua       }
5884f7415efSAmlan Barua       if (bout) {
5894f7415efSAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5909566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
5919566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5925b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5934f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5944f7415efSAmlan Barua       }
5954f7415efSAmlan Barua   #endif
5964f7415efSAmlan Barua       break;
5974f7415efSAmlan Barua     case 3:
5984f7415efSAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
5994f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
6009371c9d4SSatish Balay       N1          = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1);
6019371c9d4SSatish Balay       n1          = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1);
6024f7415efSAmlan Barua       if (fin) {
6034f7415efSAmlan Barua         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6049566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
6059566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6065b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6074f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6084f7415efSAmlan Barua       }
6095b113e21Ss-sajid-ali       if (fout) {
6105b113e21Ss-sajid-ali         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6119566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
6129566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6135b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6145b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6155b113e21Ss-sajid-ali       }
6164f7415efSAmlan Barua       if (bout) {
6174f7415efSAmlan Barua         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6189566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
6199566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6205b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6214f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6224f7415efSAmlan Barua       }
6234f7415efSAmlan Barua   #else
6240c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
6250c9b18e4SAmlan Barua       if (fin) {
6260c9b18e4SAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6279566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
6289566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6295b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6300c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6310c9b18e4SAmlan Barua       }
6320c9b18e4SAmlan Barua       if (fout) {
6330c9b18e4SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6349566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
6359566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6365b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6370c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6380c9b18e4SAmlan Barua       }
6390c9b18e4SAmlan Barua       if (bout) {
6400c9b18e4SAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6419566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
6429566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6435b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6440c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6450c9b18e4SAmlan Barua       }
6464f7415efSAmlan Barua   #endif
6474f7415efSAmlan Barua       break;
6484f7415efSAmlan Barua     default:
6494f7415efSAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
6504f7415efSAmlan Barua       temp                                  = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
6514f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
6524f7415efSAmlan Barua       alloc_local                           = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
653f4f49eeaSPierre Jolivet       N1                                    = 2 * fft->N * (PetscInt)(fftw->dim_fftw[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
6544f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
6554f7415efSAmlan Barua 
6564f7415efSAmlan Barua       if (fin) {
6574f7415efSAmlan Barua         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6589566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
6599566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6605b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6614f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6624f7415efSAmlan Barua       }
6635b113e21Ss-sajid-ali       if (fout) {
6645b113e21Ss-sajid-ali         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6659566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
6669566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6675b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6685b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6695b113e21Ss-sajid-ali       }
6704f7415efSAmlan Barua       if (bout) {
6714f7415efSAmlan Barua         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6729566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
6739566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6745b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6759496c9aeSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6764f7415efSAmlan Barua       }
6774f7415efSAmlan Barua   #else
6780c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
6790c9b18e4SAmlan Barua       if (fin) {
6800c9b18e4SAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6819566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
6829566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6835b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6840c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6850c9b18e4SAmlan Barua       }
6860c9b18e4SAmlan Barua       if (fout) {
6870c9b18e4SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6889566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
6899566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6905b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6910c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6920c9b18e4SAmlan Barua       }
6930c9b18e4SAmlan Barua       if (bout) {
6940c9b18e4SAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6959566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
6969566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6975b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6980c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6990c9b18e4SAmlan Barua       }
7004f7415efSAmlan Barua   #endif
7014f7415efSAmlan Barua       break;
7024f7415efSAmlan Barua     }
703f9d91177SJunchao Zhang     /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
704f9d91177SJunchao Zhang        v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
705da81f932SPierre Jolivet        memory leaks. We void these pointers here to avoid accident uses.
706f9d91177SJunchao Zhang     */
707f9d91177SJunchao Zhang     if (fin) (*fin)->ops->replacearray = NULL;
708f9d91177SJunchao Zhang     if (fout) (*fout)->ops->replacearray = NULL;
709f9d91177SJunchao Zhang     if (bout) (*bout)->ops->replacearray = NULL;
7100cf2e031SBarry Smith #endif
7114f7415efSAmlan Barua   }
7123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
7134f7415efSAmlan Barua }
7144f7415efSAmlan Barua 
7153564f093SHong Zhang /*@
71611a5261eSBarry Smith   VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.
71754efbe56SHong Zhang 
718c3339decSBarry Smith   Collective
7193564f093SHong Zhang 
7203564f093SHong Zhang   Input Parameters:
7213564f093SHong Zhang + A - FFTW matrix
7223564f093SHong Zhang - x - the PETSc vector
7233564f093SHong Zhang 
7242fe279fdSBarry Smith   Output Parameter:
7253564f093SHong Zhang . y - the FFTW vector
7263564f093SHong Zhang 
727b2b77a04SHong Zhang   Level: intermediate
7283564f093SHong Zhang 
72911a5261eSBarry Smith   Note:
73011a5261eSBarry Smith   For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
73197504071SAmlan Barua   one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
73297504071SAmlan Barua   zeros. This routine does that job by scattering operation.
73397504071SAmlan Barua 
7341cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
7353564f093SHong Zhang @*/
736d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
737d71ae5a4SJacob Faibussowitsch {
7383564f093SHong Zhang   PetscFunctionBegin;
739cac4c232SBarry Smith   PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
7403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
7413564f093SHong Zhang }
7423c3a9c75SAmlan Barua 
74366976f2fSJacob Faibussowitsch static PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
744d71ae5a4SJacob Faibussowitsch {
745ce94432eSBarry Smith   MPI_Comm    comm;
7463c3a9c75SAmlan Barua   Mat_FFT    *fft = (Mat_FFT *)A->data;
7479496c9aeSAmlan Barua   PetscInt    low;
7487a21806cSHong Zhang   PetscMPIInt rank, size;
7497a21806cSHong Zhang   PetscInt    vsize, vsize1;
7503c3a9c75SAmlan Barua   VecScatter  vecscat;
7510cf2e031SBarry Smith   IS          list1;
7523c3a9c75SAmlan Barua 
7533564f093SHong Zhang   PetscFunctionBegin;
7549566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
7559566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
7569566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
7579566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRange(y, &low, NULL));
7583c3a9c75SAmlan Barua 
7593564f093SHong Zhang   if (size == 1) {
7609566063dSJacob Faibussowitsch     PetscCall(VecGetSize(x, &vsize));
7619566063dSJacob Faibussowitsch     PetscCall(VecGetSize(y, &vsize1));
7629566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
7639566063dSJacob Faibussowitsch     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
7649566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7659566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7669566063dSJacob Faibussowitsch     PetscCall(VecScatterDestroy(&vecscat));
7679566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&list1));
7680cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
7693564f093SHong Zhang   } else {
7700cf2e031SBarry Smith     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
7710cf2e031SBarry Smith     PetscInt  ndim = fft->ndim, *dim = fft->dim;
7720cf2e031SBarry Smith     ptrdiff_t local_n0, local_0_start;
7730cf2e031SBarry Smith     ptrdiff_t local_n1, local_1_start;
7740cf2e031SBarry Smith     IS        list2;
7750cf2e031SBarry Smith   #if !defined(PETSC_USE_COMPLEX)
7760cf2e031SBarry Smith     PetscInt  i, j, k, partial_dim;
7770cf2e031SBarry Smith     PetscInt *indx1, *indx2, tempindx, tempindx1;
7780cf2e031SBarry Smith     PetscInt  NM;
7790cf2e031SBarry Smith     ptrdiff_t temp;
7800cf2e031SBarry Smith   #endif
7810cf2e031SBarry Smith 
7823c3a9c75SAmlan Barua     switch (ndim) {
7833c3a9c75SAmlan Barua     case 1:
78464657d84SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
7857a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
78626fbe8dcSKarl Rupp 
7879566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1));
7889566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2));
7899566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
7909566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7919566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7929566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
7939566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
7949566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
79564657d84SAmlan Barua   #else
796e5d7f247SAmlan Barua       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
79764657d84SAmlan Barua   #endif
7983c3a9c75SAmlan Barua       break;
7993c3a9c75SAmlan Barua     case 2:
800bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
8017a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
80226fbe8dcSKarl Rupp 
8039566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
8049566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
8059566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8069566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8079566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8089566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8099566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8109566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
811bd59e6c6SAmlan Barua   #else
812c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
81326fbe8dcSKarl Rupp 
81457508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
81557508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));
8163c3a9c75SAmlan Barua 
8173564f093SHong Zhang       if (dim[1] % 2 == 0) {
8183c3a9c75SAmlan Barua         NM = dim[1] + 2;
8193564f093SHong Zhang       } else {
8203c3a9c75SAmlan Barua         NM = dim[1] + 1;
8213564f093SHong Zhang       }
8223c3a9c75SAmlan Barua       for (i = 0; i < local_n0; i++) {
8233c3a9c75SAmlan Barua         for (j = 0; j < dim[1]; j++) {
8243c3a9c75SAmlan Barua           tempindx  = i * dim[1] + j;
8253c3a9c75SAmlan Barua           tempindx1 = i * NM + j;
82626fbe8dcSKarl Rupp 
8275b3e41ffSAmlan Barua           indx1[tempindx] = local_0_start * dim[1] + tempindx;
8283c3a9c75SAmlan Barua           indx2[tempindx] = low + tempindx1;
8293c3a9c75SAmlan Barua         }
8303c3a9c75SAmlan Barua       }
8313c3a9c75SAmlan Barua 
8329566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
8339566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
8343c3a9c75SAmlan Barua 
8359566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8369566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8379566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8389566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8399566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8409566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
8419566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
8429566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
843bd59e6c6SAmlan Barua   #endif
8449496c9aeSAmlan Barua       break;
8453c3a9c75SAmlan Barua 
8463c3a9c75SAmlan Barua     case 3:
847bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
8487a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
84926fbe8dcSKarl Rupp 
8509566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
8519566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
8529566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8539566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8549566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8559566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8569566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8579566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
858bd59e6c6SAmlan Barua   #else
859c4762a1bSJed Brown       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
860f1ade23cSHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
8617a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
86226fbe8dcSKarl Rupp 
86357508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
86457508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));
86551d1eed7SAmlan Barua 
866db4deed7SKarl Rupp       if (dim[2] % 2 == 0) NM = dim[2] + 2;
867db4deed7SKarl Rupp       else NM = dim[2] + 1;
86851d1eed7SAmlan Barua 
86951d1eed7SAmlan Barua       for (i = 0; i < local_n0; i++) {
87051d1eed7SAmlan Barua         for (j = 0; j < dim[1]; j++) {
87151d1eed7SAmlan Barua           for (k = 0; k < dim[2]; k++) {
87251d1eed7SAmlan Barua             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
87351d1eed7SAmlan Barua             tempindx1 = i * dim[1] * NM + j * NM + k;
87426fbe8dcSKarl Rupp 
87551d1eed7SAmlan Barua             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
87651d1eed7SAmlan Barua             indx2[tempindx] = low + tempindx1;
87751d1eed7SAmlan Barua           }
87851d1eed7SAmlan Barua         }
87951d1eed7SAmlan Barua       }
88051d1eed7SAmlan Barua 
8819566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
8829566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
8839566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8849566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8859566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8869566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8879566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8889566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
8899566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
8909566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
891bd59e6c6SAmlan Barua   #endif
8929496c9aeSAmlan Barua       break;
8933c3a9c75SAmlan Barua 
8943c3a9c75SAmlan Barua     default:
895bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
8967a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
89726fbe8dcSKarl Rupp 
8989566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
8999566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
9009566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
9019566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9029566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9039566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
9049566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
9059566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
906bd59e6c6SAmlan Barua   #else
907c4762a1bSJed Brown       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
908f1ade23cSHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
909e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
91026fbe8dcSKarl Rupp 
911e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
91226fbe8dcSKarl Rupp 
9137a21806cSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
91426fbe8dcSKarl Rupp 
915e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
916e5d7f247SAmlan Barua 
917e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
918e5d7f247SAmlan Barua 
91957508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
92057508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));
921e5d7f247SAmlan Barua 
922db4deed7SKarl Rupp       if (dim[ndim - 1] % 2 == 0) NM = 2;
923db4deed7SKarl Rupp       else NM = 1;
924e5d7f247SAmlan Barua 
9256971673cSAmlan Barua       j = low;
92657508eceSPierre Jolivet       for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
9276971673cSAmlan Barua         indx1[i] = local_0_start * partial_dim + i;
9286971673cSAmlan Barua         indx2[i] = j;
92926fbe8dcSKarl Rupp         if (k % dim[ndim - 1] == 0) j += NM;
9306971673cSAmlan Barua         j++;
9316971673cSAmlan Barua       }
9329566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
9339566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
9349566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
9359566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9369566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9379566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
9389566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
9399566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
9409566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
9419566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
942bd59e6c6SAmlan Barua   #endif
9439496c9aeSAmlan Barua       break;
9443c3a9c75SAmlan Barua     }
9450cf2e031SBarry Smith #endif
946e81bb599SAmlan Barua   }
9473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9483c3a9c75SAmlan Barua }
9493c3a9c75SAmlan Barua 
9503564f093SHong Zhang /*@
95111a5261eSBarry Smith   VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.
9523564f093SHong Zhang 
953c3339decSBarry Smith   Collective
9543564f093SHong Zhang 
9553564f093SHong Zhang   Input Parameters:
95611a5261eSBarry Smith + A - `MATFFTW` matrix
9573564f093SHong Zhang - x - FFTW vector
9583564f093SHong Zhang 
9592fe279fdSBarry Smith   Output Parameter:
9603564f093SHong Zhang . y - PETSc vector
9613564f093SHong Zhang 
9623564f093SHong Zhang   Level: intermediate
9633564f093SHong Zhang 
96411a5261eSBarry Smith   Note:
96511a5261eSBarry Smith   While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
96611a5261eSBarry Smith   `VecScatterFFTWToPetsc()` removes those extra zeros.
9673564f093SHong Zhang 
9681cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
9693564f093SHong Zhang @*/
970d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
971d71ae5a4SJacob Faibussowitsch {
9723c3a9c75SAmlan Barua   PetscFunctionBegin;
973cac4c232SBarry Smith   PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
9743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9753c3a9c75SAmlan Barua }
97654efbe56SHong Zhang 
97766976f2fSJacob Faibussowitsch static PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
978d71ae5a4SJacob Faibussowitsch {
979ce94432eSBarry Smith   MPI_Comm    comm;
9805b3e41ffSAmlan Barua   Mat_FFT    *fft = (Mat_FFT *)A->data;
9819496c9aeSAmlan Barua   PetscInt    low;
9827a21806cSHong Zhang   PetscMPIInt rank, size;
9835b3e41ffSAmlan Barua   VecScatter  vecscat;
9840cf2e031SBarry Smith   IS          list1;
9859496c9aeSAmlan Barua 
9863564f093SHong Zhang   PetscFunctionBegin;
9879566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
9889566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
9899566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
9909566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRange(x, &low, NULL));
9915b3e41ffSAmlan Barua 
992e81bb599SAmlan Barua   if (size == 1) {
9939566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
9949566063dSJacob Faibussowitsch     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
9959566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9969566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9979566063dSJacob Faibussowitsch     PetscCall(VecScatterDestroy(&vecscat));
9989566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&list1));
999e81bb599SAmlan Barua 
10000cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
10013564f093SHong Zhang   } else {
10020cf2e031SBarry Smith     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
10030cf2e031SBarry Smith     PetscInt  ndim = fft->ndim, *dim = fft->dim;
10040cf2e031SBarry Smith     ptrdiff_t local_n0, local_0_start;
10050cf2e031SBarry Smith     ptrdiff_t local_n1, local_1_start;
10060cf2e031SBarry Smith     IS        list2;
10070cf2e031SBarry Smith   #if !defined(PETSC_USE_COMPLEX)
10080cf2e031SBarry Smith     PetscInt  i, j, k, partial_dim;
10090cf2e031SBarry Smith     PetscInt *indx1, *indx2, tempindx, tempindx1;
10100cf2e031SBarry Smith     PetscInt  NM;
10110cf2e031SBarry Smith     ptrdiff_t temp;
10120cf2e031SBarry Smith   #endif
1013e81bb599SAmlan Barua     switch (ndim) {
1014e81bb599SAmlan Barua     case 1:
101564657d84SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
10167a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
101726fbe8dcSKarl Rupp 
10189566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1));
10199566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2));
10209566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
10219566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10229566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10239566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10249566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10259566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
102664657d84SAmlan Barua   #else
10276a506ed8SAmlan Barua       SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
102864657d84SAmlan Barua   #endif
10295b3e41ffSAmlan Barua       break;
10305b3e41ffSAmlan Barua     case 2:
1031bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
10327a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
103326fbe8dcSKarl Rupp 
10349566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
10359566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
10369566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
10379566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10389566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10399566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10409566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10419566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1042bd59e6c6SAmlan Barua   #else
10437a21806cSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
104426fbe8dcSKarl Rupp 
104557508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
104657508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));
10475b3e41ffSAmlan Barua 
1048db4deed7SKarl Rupp       if (dim[1] % 2 == 0) NM = dim[1] + 2;
1049db4deed7SKarl Rupp       else NM = dim[1] + 1;
10505b3e41ffSAmlan Barua 
10515b3e41ffSAmlan Barua       for (i = 0; i < local_n0; i++) {
10525b3e41ffSAmlan Barua         for (j = 0; j < dim[1]; j++) {
10535b3e41ffSAmlan Barua           tempindx  = i * dim[1] + j;
10545b3e41ffSAmlan Barua           tempindx1 = i * NM + j;
105526fbe8dcSKarl Rupp 
10565b3e41ffSAmlan Barua           indx1[tempindx] = local_0_start * dim[1] + tempindx;
10575b3e41ffSAmlan Barua           indx2[tempindx] = low + tempindx1;
10585b3e41ffSAmlan Barua         }
10595b3e41ffSAmlan Barua       }
10605b3e41ffSAmlan Barua 
10619566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
10629566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
10635b3e41ffSAmlan Barua 
10649566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
10659566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10669566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10679566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10689566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10699566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
10709566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
10719566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1072bd59e6c6SAmlan Barua   #endif
10739496c9aeSAmlan Barua       break;
10745b3e41ffSAmlan Barua     case 3:
1075bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
10767a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
107726fbe8dcSKarl Rupp 
10789566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
10799566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
10809566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
10819566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10829566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10839566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10849566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10859566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1086bd59e6c6SAmlan Barua   #else
10877a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
108826fbe8dcSKarl Rupp 
108957508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
109057508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));
109151d1eed7SAmlan Barua 
1092db4deed7SKarl Rupp       if (dim[2] % 2 == 0) NM = dim[2] + 2;
1093db4deed7SKarl Rupp       else NM = dim[2] + 1;
109451d1eed7SAmlan Barua 
109551d1eed7SAmlan Barua       for (i = 0; i < local_n0; i++) {
109651d1eed7SAmlan Barua         for (j = 0; j < dim[1]; j++) {
109751d1eed7SAmlan Barua           for (k = 0; k < dim[2]; k++) {
109851d1eed7SAmlan Barua             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
109951d1eed7SAmlan Barua             tempindx1 = i * dim[1] * NM + j * NM + k;
110026fbe8dcSKarl Rupp 
110151d1eed7SAmlan Barua             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
110251d1eed7SAmlan Barua             indx2[tempindx] = low + tempindx1;
110351d1eed7SAmlan Barua           }
110451d1eed7SAmlan Barua         }
110551d1eed7SAmlan Barua       }
110651d1eed7SAmlan Barua 
11079566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
11089566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
110951d1eed7SAmlan Barua 
11109566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
11119566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11129566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11139566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
11149566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
11159566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
11169566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
11179566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1118bd59e6c6SAmlan Barua   #endif
11199496c9aeSAmlan Barua       break;
11205b3e41ffSAmlan Barua     default:
1121bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
11227a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
112326fbe8dcSKarl Rupp 
11249566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
11259566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
11269566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
11279566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11289566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11299566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
11309566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
11319566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1132bd59e6c6SAmlan Barua   #else
1133ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
113426fbe8dcSKarl Rupp 
1135ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
113626fbe8dcSKarl Rupp 
1137c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
113826fbe8dcSKarl Rupp 
1139ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
1140ba6e06d0SAmlan Barua 
1141ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1142ba6e06d0SAmlan Barua 
114357508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
114457508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));
1145ba6e06d0SAmlan Barua 
1146db4deed7SKarl Rupp       if (dim[ndim - 1] % 2 == 0) NM = 2;
1147db4deed7SKarl Rupp       else NM = 1;
1148ba6e06d0SAmlan Barua 
1149ba6e06d0SAmlan Barua       j = low;
115057508eceSPierre Jolivet       for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
1151ba6e06d0SAmlan Barua         indx1[i] = local_0_start * partial_dim + i;
1152ba6e06d0SAmlan Barua         indx2[i] = j;
11533564f093SHong Zhang         if (k % dim[ndim - 1] == 0) j += NM;
1154ba6e06d0SAmlan Barua         j++;
1155ba6e06d0SAmlan Barua       }
11569566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
11579566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
1158ba6e06d0SAmlan Barua 
11599566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
11609566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11619566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11629566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
11639566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
11649566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
11659566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
11669566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1167bd59e6c6SAmlan Barua   #endif
11689496c9aeSAmlan Barua       break;
11695b3e41ffSAmlan Barua     }
11700cf2e031SBarry Smith #endif
1171e81bb599SAmlan Barua   }
11723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
11735b3e41ffSAmlan Barua }
11745b3e41ffSAmlan Barua 
1175350f1385SJose E. Roman /*MC
1176350f1385SJose E. Roman   MATFFTW -  "fftw" - Matrix type that provides FFT via the FFTW external package.
1177350f1385SJose E. Roman 
1178350f1385SJose E. Roman   Level: intermediate
1179350f1385SJose E. Roman 
11801cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()`
1181350f1385SJose E. Roman M*/
1182d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1183d71ae5a4SJacob Faibussowitsch {
1184ce94432eSBarry Smith   MPI_Comm    comm;
1185b2b77a04SHong Zhang   Mat_FFT    *fft = (Mat_FFT *)A->data;
1186b2b77a04SHong Zhang   Mat_FFTW   *fftw;
11870cf2e031SBarry Smith   PetscInt    ndim = fft->ndim, *dim = fft->dim;
11885274ed1bSHong Zhang   const char *plans[]  = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
11895274ed1bSHong Zhang   unsigned    iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1190b2b77a04SHong Zhang   PetscBool   flg;
11914f48bc67SAmlan Barua   PetscInt    p_flag, partial_dim = 1, ctr;
119211902ff2SHong Zhang   PetscMPIInt size, rank;
11939496c9aeSAmlan Barua   ptrdiff_t  *pdim;
11949496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11950cf2e031SBarry Smith   PetscInt tot_dim;
11969496c9aeSAmlan Barua #endif
11979496c9aeSAmlan Barua 
1198b2b77a04SHong Zhang   PetscFunctionBegin;
11999566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
12009566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
12019566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1202c92418ccSAmlan Barua 
12030cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
12041878d478SAmlan Barua   fftw_mpi_init();
12050cf2e031SBarry Smith #endif
120611902ff2SHong Zhang   pdim    = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
120711902ff2SHong Zhang   pdim[0] = dim[0];
12088ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12098ca4c034SAmlan Barua   tot_dim = 2 * dim[0];
12108ca4c034SAmlan Barua #endif
12113564f093SHong Zhang   for (ctr = 1; ctr < ndim; ctr++) {
12126882372aSHong Zhang     partial_dim *= dim[ctr];
121311902ff2SHong Zhang     pdim[ctr] = dim[ctr];
12148ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1215db4deed7SKarl Rupp     if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1216db4deed7SKarl Rupp     else tot_dim *= dim[ctr];
12178ca4c034SAmlan Barua #endif
12186882372aSHong Zhang   }
12193c3a9c75SAmlan Barua 
1220b2b77a04SHong Zhang   if (size == 1) {
1221e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12229566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
12230cf2e031SBarry Smith     fft->n = fft->N;
1224e81bb599SAmlan Barua #else
12259566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
12260cf2e031SBarry Smith     fft->n = tot_dim;
12270cf2e031SBarry Smith #endif
12280cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
12290cf2e031SBarry Smith   } else {
12300cf2e031SBarry Smith     ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
12310cf2e031SBarry Smith   #if !defined(PETSC_USE_COMPLEX)
12320cf2e031SBarry Smith     ptrdiff_t temp;
12330cf2e031SBarry Smith     PetscInt  N1;
1234e81bb599SAmlan Barua   #endif
1235e81bb599SAmlan Barua 
1236b2b77a04SHong Zhang     switch (ndim) {
1237b2b77a04SHong Zhang     case 1:
12383c3a9c75SAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
12393c3a9c75SAmlan Barua       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1240e5d7f247SAmlan Barua   #else
12417a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
12420cf2e031SBarry Smith       fft->n = (PetscInt)local_n0;
12439566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N));
1244e5d7f247SAmlan Barua   #endif
1245b2b77a04SHong Zhang       break;
1246b2b77a04SHong Zhang     case 2:
12475b3e41ffSAmlan Barua   #if defined(PETSC_USE_COMPLEX)
12487a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
12490cf2e031SBarry Smith       fft->n = (PetscInt)local_n0 * dim[1];
12509566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
12515b3e41ffSAmlan Barua   #else
1252c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
125326fbe8dcSKarl Rupp 
12540cf2e031SBarry Smith       fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
12559566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
12565b3e41ffSAmlan Barua   #endif
1257b2b77a04SHong Zhang       break;
1258b2b77a04SHong Zhang     case 3:
125951d1eed7SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
12607a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
126126fbe8dcSKarl Rupp 
12620cf2e031SBarry Smith       fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
12639566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
126451d1eed7SAmlan Barua   #else
1265c3eba89aSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
126626fbe8dcSKarl Rupp 
12670cf2e031SBarry Smith       fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
12689566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
126951d1eed7SAmlan Barua   #endif
1270b2b77a04SHong Zhang       break;
1271b2b77a04SHong Zhang     default:
1272b3a17365SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
12737a21806cSHong Zhang       fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);
127426fbe8dcSKarl Rupp 
12750cf2e031SBarry Smith       fft->n = (PetscInt)local_n0 * partial_dim;
12769566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1277b3a17365SAmlan Barua   #else
1278b3a17365SAmlan Barua       temp = pdim[ndim - 1];
127926fbe8dcSKarl Rupp 
1280b3a17365SAmlan Barua       pdim[ndim - 1] = temp / 2 + 1;
128126fbe8dcSKarl Rupp 
1282c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
128326fbe8dcSKarl Rupp 
12840cf2e031SBarry Smith       fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp;
12850cf2e031SBarry Smith       N1     = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);
128626fbe8dcSKarl Rupp 
1287b3a17365SAmlan Barua       pdim[ndim - 1] = temp;
128826fbe8dcSKarl Rupp 
12899566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1290b3a17365SAmlan Barua   #endif
1291b2b77a04SHong Zhang       break;
1292b2b77a04SHong Zhang     }
12930cf2e031SBarry Smith #endif
1294b2b77a04SHong Zhang   }
12952277172eSMark Adams   free(pdim);
12969566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
12974dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&fftw));
1298b2b77a04SHong Zhang   fft->data = (void *)fftw;
1299b2b77a04SHong Zhang 
13000c74a584SJed Brown   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1301e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
130226fbe8dcSKarl Rupp 
1303f4f49eeaSPierre Jolivet   PetscCall(PetscMalloc1(ndim, &fftw->dim_fftw));
13048c1d5ab3SHong Zhang   if (size == 1) {
1305a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
1306a1b6d50cSHong Zhang     fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1307a1b6d50cSHong Zhang #else
13088c1d5ab3SHong Zhang     fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1309a1b6d50cSHong Zhang #endif
13108c1d5ab3SHong Zhang   }
131126fbe8dcSKarl Rupp 
1312b1301b2eSHong Zhang   for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];
1313c92418ccSAmlan Barua 
1314f4259b30SLisandro Dalcin   fftw->p_forward  = NULL;
1315f4259b30SLisandro Dalcin   fftw->p_backward = NULL;
1316b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1317b2b77a04SHong Zhang 
1318b2b77a04SHong Zhang   if (size == 1) {
1319b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1320b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
13210cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
1322b2b77a04SHong Zhang   } else {
1323b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1324b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
13250cf2e031SBarry Smith #endif
1326b2b77a04SHong Zhang   }
1327b2b77a04SHong Zhang   fft->matdestroy = MatDestroy_FFTW;
1328b2b77a04SHong Zhang   A->assembled    = PETSC_TRUE;
13294a52bad8SHong Zhang   A->preallocated = PETSC_TRUE;
133026fbe8dcSKarl Rupp 
13319566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
13329566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
13339566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));
1334b2b77a04SHong Zhang 
1335b2b77a04SHong Zhang   /* get runtime options */
1336d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
13379566063dSJacob Faibussowitsch   PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
13385f80ce2aSJacob Faibussowitsch   if (flg) fftw->p_flag = iplans[p_flag];
1339d0609cedSBarry Smith   PetscOptionsEnd();
13403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1341b2b77a04SHong Zhang }
1342