xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 1cc06b555e92f8ec64db10330b8bbd830e5bc876)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4c4762a1bSJed Brown     Testing examples can be found in ~src/mat/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
90cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
10c6db04a5SJed Brown   #include <fftw3-mpi.h>
110cf2e031SBarry Smith #else
120cf2e031SBarry Smith   #include <fftw3.h>
130cf2e031SBarry Smith #endif
14b2b77a04SHong Zhang EXTERN_C_END
15b2b77a04SHong Zhang 
16b2b77a04SHong Zhang typedef struct {
17b9ae062cSHong Zhang   ptrdiff_t ndim_fftw, *dim_fftw;
18a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
19a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
20a1b6d50cSHong Zhang #else
218c1d5ab3SHong Zhang   fftw_iodim *iodims;
22a1b6d50cSHong Zhang #endif
23e5d7f247SAmlan Barua   PetscInt     partial_dim;
24b2b77a04SHong Zhang   fftw_plan    p_forward, p_backward;
25b2b77a04SHong Zhang   unsigned     p_flag;                                      /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
26da81f932SPierre Jolivet   PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw plan should be
27b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
28b2b77a04SHong Zhang } Mat_FFTW;
29b2b77a04SHong Zhang 
30b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec);
31b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec);
320cf2e031SBarry Smith extern PetscErrorCode MatDestroy_FFTW(Mat);
330cf2e031SBarry Smith extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
340cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
35b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec);
36b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec);
37b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
380cf2e031SBarry Smith #endif
39b2b77a04SHong Zhang 
40d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
41d71ae5a4SJacob Faibussowitsch {
42b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
43b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
44f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
45f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
461acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
47a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
48a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
49a1b6d50cSHong Zhang   #else
508c1d5ab3SHong Zhang   fftw_iodim *iodims;
51a1b6d50cSHong Zhang   #endif
521acd80e4SHong Zhang   PetscInt i;
531acd80e4SHong Zhang #endif
541acd80e4SHong Zhang   PetscInt ndim = fft->ndim, *dim = fft->dim;
55b2b77a04SHong Zhang 
56b2b77a04SHong Zhang   PetscFunctionBegin;
579566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &x_array));
589566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
596aad120cSJose E. Roman   if (!fftw->p_forward) { /* create a plan, then execute it */
60b2b77a04SHong Zhang     switch (ndim) {
61b2b77a04SHong Zhang     case 1:
6258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
63b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
6458a912c5SAmlan Barua #else
6558a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
6658a912c5SAmlan Barua #endif
67b2b77a04SHong Zhang       break;
68b2b77a04SHong Zhang     case 2:
6958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
70b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
7158a912c5SAmlan Barua #else
7258a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
7358a912c5SAmlan Barua #endif
74b2b77a04SHong Zhang       break;
75b2b77a04SHong Zhang     case 3:
7658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
77b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
7858a912c5SAmlan Barua #else
7958a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
8058a912c5SAmlan Barua #endif
81b2b77a04SHong Zhang       break;
82b2b77a04SHong Zhang     default:
8358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
84a1b6d50cSHong Zhang       iodims = fftw->iodims;
85a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
868c1d5ab3SHong Zhang       if (ndim) {
87a1b6d50cSHong Zhang         iodims[ndim - 1].n  = (ptrdiff_t)dim[ndim - 1];
888c1d5ab3SHong Zhang         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
898c1d5ab3SHong Zhang         for (i = ndim - 2; i >= 0; --i) {
90a1b6d50cSHong Zhang           iodims[i].n  = (ptrdiff_t)dim[i];
918c1d5ab3SHong Zhang           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
928c1d5ab3SHong Zhang         }
938c1d5ab3SHong Zhang       }
94a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
95a1b6d50cSHong Zhang   #else
96a1b6d50cSHong Zhang       if (ndim) {
97a1b6d50cSHong Zhang         iodims[ndim - 1].n  = (int)dim[ndim - 1];
98a1b6d50cSHong Zhang         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
99a1b6d50cSHong Zhang         for (i = ndim - 2; i >= 0; --i) {
100a1b6d50cSHong Zhang           iodims[i].n  = (int)dim[i];
101a1b6d50cSHong Zhang           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
102a1b6d50cSHong Zhang         }
103a1b6d50cSHong Zhang       }
104a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
105a1b6d50cSHong Zhang   #endif
106a1b6d50cSHong Zhang 
10758a912c5SAmlan Barua #else
108a31b9140SHong Zhang       fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
10958a912c5SAmlan Barua #endif
110b2b77a04SHong Zhang       break;
111b2b77a04SHong Zhang     }
112f4fca6d4SMatthew G. Knepley     fftw->finarray  = (PetscScalar *)x_array;
113b2b77a04SHong Zhang     fftw->foutarray = y_array;
114b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
115b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
116b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
117b2b77a04SHong Zhang   } else {                                                         /* use existing plan */
118b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
1197e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
120b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
1217e4bc134SDominic Meiser #else
1227e4bc134SDominic Meiser       fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
1237e4bc134SDominic Meiser #endif
124b2b77a04SHong Zhang     } else {
125b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
126b2b77a04SHong Zhang     }
127b2b77a04SHong Zhang   }
1289566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
1299566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
1303ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
131b2b77a04SHong Zhang }
132b2b77a04SHong Zhang 
133d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
134d71ae5a4SJacob Faibussowitsch {
135b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
136b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
137f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
138f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
139b2b77a04SHong Zhang   PetscInt           ndim = fft->ndim, *dim = fft->dim;
1401acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
141a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
142a1b6d50cSHong Zhang   fftw_iodim64 *iodims = fftw->iodims;
143a1b6d50cSHong Zhang   #else
144a1b6d50cSHong Zhang   fftw_iodim *iodims = fftw->iodims;
145a1b6d50cSHong Zhang   #endif
1461acd80e4SHong Zhang #endif
147b2b77a04SHong Zhang 
148b2b77a04SHong Zhang   PetscFunctionBegin;
1499566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &x_array));
1509566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
1516aad120cSJose E. Roman   if (!fftw->p_backward) { /* create a plan, then execute it */
152b2b77a04SHong Zhang     switch (ndim) {
153b2b77a04SHong Zhang     case 1:
15458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
155b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
15658a912c5SAmlan Barua #else
157e81bb599SAmlan Barua       fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
15858a912c5SAmlan Barua #endif
159b2b77a04SHong Zhang       break;
160b2b77a04SHong Zhang     case 2:
16158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
162b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
16358a912c5SAmlan Barua #else
164e81bb599SAmlan Barua       fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
16558a912c5SAmlan Barua #endif
166b2b77a04SHong Zhang       break;
167b2b77a04SHong Zhang     case 3:
16858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
169b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
17058a912c5SAmlan Barua #else
171e81bb599SAmlan Barua       fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
17258a912c5SAmlan Barua #endif
173b2b77a04SHong Zhang       break;
174b2b77a04SHong Zhang     default:
17558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
176a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
177a1b6d50cSHong Zhang       fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
178a1b6d50cSHong Zhang   #else
1798c1d5ab3SHong Zhang       fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
180a1b6d50cSHong Zhang   #endif
18158a912c5SAmlan Barua #else
182a31b9140SHong Zhang       fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
18358a912c5SAmlan Barua #endif
184b2b77a04SHong Zhang       break;
185b2b77a04SHong Zhang     }
186f4fca6d4SMatthew G. Knepley     fftw->binarray  = (PetscScalar *)x_array;
187b2b77a04SHong Zhang     fftw->boutarray = y_array;
1882f613bf5SBarry Smith     fftw_execute(fftw->p_backward);
189b2b77a04SHong Zhang   } else {                                                         /* use existing plan */
190b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
1917e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
192b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
1937e4bc134SDominic Meiser #else
1947e4bc134SDominic Meiser       fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
1957e4bc134SDominic Meiser #endif
196b2b77a04SHong Zhang     } else {
1972f613bf5SBarry Smith       fftw_execute(fftw->p_backward);
198b2b77a04SHong Zhang     }
199b2b77a04SHong Zhang   }
2009566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
2019566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
2023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
203b2b77a04SHong Zhang }
204b2b77a04SHong Zhang 
2050cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
206d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
207d71ae5a4SJacob Faibussowitsch {
208b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
209b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
210f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
211f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
212c92418ccSAmlan Barua   PetscInt           ndim = fft->ndim, *dim = fft->dim;
213ce94432eSBarry Smith   MPI_Comm           comm;
214b2b77a04SHong Zhang 
215b2b77a04SHong Zhang   PetscFunctionBegin;
2169566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2179566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &x_array));
2189566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
2196aad120cSJose E. Roman   if (!fftw->p_forward) { /* create a plan, then execute it */
220b2b77a04SHong Zhang     switch (ndim) {
221b2b77a04SHong Zhang     case 1:
2223c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
223b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
224ae0a50aaSHong Zhang   #else
2254f8276c3SHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
2263c3a9c75SAmlan Barua   #endif
227b2b77a04SHong Zhang       break;
228b2b77a04SHong Zhang     case 2:
22997504071SAmlan Barua   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
230b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2313c3a9c75SAmlan Barua   #else
2323c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2333c3a9c75SAmlan Barua   #endif
234b2b77a04SHong Zhang       break;
235b2b77a04SHong Zhang     case 3:
2363c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
237b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2383c3a9c75SAmlan Barua   #else
23951d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2403c3a9c75SAmlan Barua   #endif
241b2b77a04SHong Zhang       break;
242b2b77a04SHong Zhang     default:
2433c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
244c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2453c3a9c75SAmlan Barua   #else
2463c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2473c3a9c75SAmlan Barua   #endif
248b2b77a04SHong Zhang       break;
249b2b77a04SHong Zhang     }
250f4fca6d4SMatthew G. Knepley     fftw->finarray  = (PetscScalar *)x_array;
251b2b77a04SHong Zhang     fftw->foutarray = y_array;
252b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
253b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
254b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
255b2b77a04SHong Zhang   } else {                                                         /* use existing plan */
256b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
257b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
258b2b77a04SHong Zhang     } else {
259b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
260b2b77a04SHong Zhang     }
261b2b77a04SHong Zhang   }
2629566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
2639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
2643ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
265b2b77a04SHong Zhang }
266b2b77a04SHong Zhang 
267d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
268d71ae5a4SJacob Faibussowitsch {
269b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
270b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
271f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
272f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
273c92418ccSAmlan Barua   PetscInt           ndim = fft->ndim, *dim = fft->dim;
274ce94432eSBarry Smith   MPI_Comm           comm;
275b2b77a04SHong Zhang 
276b2b77a04SHong Zhang   PetscFunctionBegin;
2779566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2789566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &x_array));
2799566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
2806aad120cSJose E. Roman   if (!fftw->p_backward) { /* create a plan, then execute it */
281b2b77a04SHong Zhang     switch (ndim) {
282b2b77a04SHong Zhang     case 1:
2833c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
284b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
285ae0a50aaSHong Zhang   #else
2864f8276c3SHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
2873c3a9c75SAmlan Barua   #endif
288b2b77a04SHong Zhang       break;
289b2b77a04SHong Zhang     case 2:
29097504071SAmlan Barua   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
291b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
2923c3a9c75SAmlan Barua   #else
2933c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
2943c3a9c75SAmlan Barua   #endif
295b2b77a04SHong Zhang       break;
296b2b77a04SHong Zhang     case 3:
2973c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
298b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
2993c3a9c75SAmlan Barua   #else
3003c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3013c3a9c75SAmlan Barua   #endif
302b2b77a04SHong Zhang       break;
303b2b77a04SHong Zhang     default:
3043c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
305c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
3063c3a9c75SAmlan Barua   #else
3073c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3083c3a9c75SAmlan Barua   #endif
309b2b77a04SHong Zhang       break;
310b2b77a04SHong Zhang     }
311f4fca6d4SMatthew G. Knepley     fftw->binarray  = (PetscScalar *)x_array;
312b2b77a04SHong Zhang     fftw->boutarray = y_array;
3132f613bf5SBarry Smith     fftw_execute(fftw->p_backward);
314b2b77a04SHong Zhang   } else {                                                         /* use existing plan */
315b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
316b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
317b2b77a04SHong Zhang     } else {
3182f613bf5SBarry Smith       fftw_execute(fftw->p_backward);
319b2b77a04SHong Zhang     }
320b2b77a04SHong Zhang   }
3219566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
3229566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
3233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
324b2b77a04SHong Zhang }
3250cf2e031SBarry Smith #endif
326b2b77a04SHong Zhang 
327d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_FFTW(Mat A)
328d71ae5a4SJacob Faibussowitsch {
329b2b77a04SHong Zhang   Mat_FFT  *fft  = (Mat_FFT *)A->data;
330b2b77a04SHong Zhang   Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
331b2b77a04SHong Zhang 
332b2b77a04SHong Zhang   PetscFunctionBegin;
333b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
334b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
335ad540459SPierre Jolivet   if (fftw->iodims) free(fftw->iodims);
3369566063dSJacob Faibussowitsch   PetscCall(PetscFree(fftw->dim_fftw));
3379566063dSJacob Faibussowitsch   PetscCall(PetscFree(fft->data));
3382e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
3392e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
3402e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
3410cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
3426ccf0b3eSHong Zhang   fftw_mpi_cleanup();
3430cf2e031SBarry Smith #endif
3443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
345b2b77a04SHong Zhang }
346b2b77a04SHong Zhang 
3470cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
348c6db04a5SJed Brown   #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I  "petscvec.h"   I*/
349d71ae5a4SJacob Faibussowitsch PetscErrorCode VecDestroy_MPIFFTW(Vec v)
350d71ae5a4SJacob Faibussowitsch {
351b2b77a04SHong Zhang   PetscScalar *array;
352b2b77a04SHong Zhang 
353b2b77a04SHong Zhang   PetscFunctionBegin;
3549566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &array));
3552f613bf5SBarry Smith   fftw_free((fftw_complex *)array);
3569566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &array));
3579566063dSJacob Faibussowitsch   PetscCall(VecDestroy_MPI(v));
3583ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
359b2b77a04SHong Zhang }
3600cf2e031SBarry Smith #endif
361b2b77a04SHong Zhang 
3620cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
363d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
364d71ae5a4SJacob Faibussowitsch {
3655b113e21Ss-sajid-ali   Mat A;
3665b113e21Ss-sajid-ali 
3675b113e21Ss-sajid-ali   PetscFunctionBegin;
3689566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
3699566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
3703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3715b113e21Ss-sajid-ali }
3725b113e21Ss-sajid-ali 
373d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
374d71ae5a4SJacob Faibussowitsch {
3755b113e21Ss-sajid-ali   Mat A;
3765b113e21Ss-sajid-ali 
3775b113e21Ss-sajid-ali   PetscFunctionBegin;
3789566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
3799566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
3803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3815b113e21Ss-sajid-ali }
3825b113e21Ss-sajid-ali 
383d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
384d71ae5a4SJacob Faibussowitsch {
3855b113e21Ss-sajid-ali   Mat A;
3865b113e21Ss-sajid-ali 
3875b113e21Ss-sajid-ali   PetscFunctionBegin;
3889566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
3899566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
3903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3915b113e21Ss-sajid-ali }
3920cf2e031SBarry Smith #endif
3935b113e21Ss-sajid-ali 
3944be45526SHong Zhang /*@
3952a7a6963SBarry Smith    MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
39611a5261eSBarry Smith      parallel layout determined by `MATFFTW`
3974f7415efSAmlan Barua 
398c3339decSBarry Smith    Collective
3994f7415efSAmlan Barua 
4004f7415efSAmlan Barua    Input Parameter:
4013564f093SHong Zhang .   A - the matrix
4024f7415efSAmlan Barua 
403d8d19677SJose E. Roman    Output Parameters:
404cc55f3b1SHong Zhang +   x - (optional) input vector of forward FFTW
4056b867d5aSJose E. Roman .   y - (optional) output vector of forward FFTW
406cc55f3b1SHong Zhang -   z - (optional) output vector of backward FFTW
4074f7415efSAmlan Barua 
4082ef1f0ffSBarry Smith    Options Database Key:
4092ef1f0ffSBarry Smith .  -mat_fftw_plannerflags - set FFTW planner flags
4102ef1f0ffSBarry Smith 
4114f7415efSAmlan Barua   Level: advanced
4123564f093SHong Zhang 
41311a5261eSBarry Smith   Notes:
41411a5261eSBarry Smith   The parallel layout of output of forward FFTW is always same as the input
41597504071SAmlan Barua   of the backward FFTW. But parallel layout of the input vector of forward
41697504071SAmlan Barua   FFTW might not be same as the output of backward FFTW.
41711a5261eSBarry Smith 
41811a5261eSBarry Smith   We need to provide enough space while doing parallel real transform.
41997504071SAmlan Barua   We need to pad extra zeros at the end of last dimension. For this reason the one needs to
42097504071SAmlan Barua   invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
421e0ec536eSAmlan Barua   last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
422e0ec536eSAmlan Barua   depends on if the last dimension is even or odd. If the last dimension is even need to pad two
423e0ec536eSAmlan Barua   zeros if it is odd only one zero is needed.
42411a5261eSBarry Smith 
425e0ec536eSAmlan Barua   Lastly one needs some scratch space at the end of data set in each process. alloc_local
426e0ec536eSAmlan Barua   figures out how much space is needed, i.e. it figures out the data+scratch space for
427e0ec536eSAmlan Barua   each processor and returns that.
4284f7415efSAmlan Barua 
42911a5261eSBarry Smith   Developer Note:
43011a5261eSBarry Smith   How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?
43111a5261eSBarry Smith 
432*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
4334be45526SHong Zhang @*/
434d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
435d71ae5a4SJacob Faibussowitsch {
4364be45526SHong Zhang   PetscFunctionBegin;
437cac4c232SBarry Smith   PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
4383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4394be45526SHong Zhang }
4404be45526SHong Zhang 
441d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
442d71ae5a4SJacob Faibussowitsch {
4434f7415efSAmlan Barua   PetscMPIInt size, rank;
444ce94432eSBarry Smith   MPI_Comm    comm;
4454f7415efSAmlan Barua   Mat_FFT    *fft = (Mat_FFT *)A->data;
4464f7415efSAmlan Barua 
4474f7415efSAmlan Barua   PetscFunctionBegin;
4484f7415efSAmlan Barua   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4494f7415efSAmlan Barua   PetscValidType(A, 1);
4509566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
4514f7415efSAmlan Barua 
4529566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
4539566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4544f7415efSAmlan Barua   if (size == 1) { /* sequential case */
4554f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4569566063dSJacob Faibussowitsch     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
4579566063dSJacob Faibussowitsch     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
4589566063dSJacob Faibussowitsch     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
4594f7415efSAmlan Barua #else
4609566063dSJacob Faibussowitsch     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
4619566063dSJacob Faibussowitsch     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
4629566063dSJacob Faibussowitsch     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
4634f7415efSAmlan Barua #endif
4640cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
46597504071SAmlan Barua   } else { /* parallel cases */
4660cf2e031SBarry Smith     Mat_FFTW     *fftw = (Mat_FFTW *)fft->data;
4670cf2e031SBarry Smith     PetscInt      ndim = fft->ndim, *dim = fft->dim;
4684f7415efSAmlan Barua     ptrdiff_t     alloc_local, local_n0, local_0_start;
4699496c9aeSAmlan Barua     ptrdiff_t     local_n1;
4709496c9aeSAmlan Barua     fftw_complex *data_fout;
4719496c9aeSAmlan Barua     ptrdiff_t     local_1_start;
4729496c9aeSAmlan Barua   #if defined(PETSC_USE_COMPLEX)
4739496c9aeSAmlan Barua     fftw_complex *data_fin, *data_bout;
4749496c9aeSAmlan Barua   #else
4754f7415efSAmlan Barua     double   *data_finr, *data_boutr;
4766ccf0b3eSHong Zhang     PetscInt  n1, N1;
4779496c9aeSAmlan Barua     ptrdiff_t temp;
4789496c9aeSAmlan Barua   #endif
4799496c9aeSAmlan Barua 
4804f7415efSAmlan Barua     switch (ndim) {
4814f7415efSAmlan Barua     case 1:
48257625b84SAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
4834f8276c3SHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
48457625b84SAmlan Barua   #else
48557625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
48657625b84SAmlan Barua       if (fin) {
48757625b84SAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
4889566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin));
4899566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
4905b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
49157625b84SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
49257625b84SAmlan Barua       }
49357625b84SAmlan Barua       if (fout) {
49457625b84SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
4959566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout));
4969566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
4975b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
49857625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
49957625b84SAmlan Barua       }
50057625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
50157625b84SAmlan Barua       if (bout) {
50257625b84SAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5039566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout));
5049566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5055b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
50657625b84SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
50757625b84SAmlan Barua       }
50857625b84SAmlan Barua       break;
50957625b84SAmlan Barua   #endif
5104f7415efSAmlan Barua     case 2:
51197504071SAmlan Barua   #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
5124f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
5139371c9d4SSatish Balay       N1          = 2 * dim[0] * (dim[1] / 2 + 1);
5149371c9d4SSatish Balay       n1          = 2 * local_n0 * (dim[1] / 2 + 1);
5154f7415efSAmlan Barua       if (fin) {
5164f7415efSAmlan Barua         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
5179566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
5189566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5195b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5204f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5214f7415efSAmlan Barua       }
52257625b84SAmlan Barua       if (fout) {
52357625b84SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5249566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
5259566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5265b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
52757625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
52857625b84SAmlan Barua       }
5295b113e21Ss-sajid-ali       if (bout) {
5305b113e21Ss-sajid-ali         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
5319566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
5329566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5335b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5345b113e21Ss-sajid-ali         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5355b113e21Ss-sajid-ali       }
5364f7415efSAmlan Barua   #else
5374f7415efSAmlan Barua       /* Get local size */
5384f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
5394f7415efSAmlan Barua       if (fin) {
5404f7415efSAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5419566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
5429566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5435b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5444f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5454f7415efSAmlan Barua       }
5464f7415efSAmlan Barua       if (fout) {
5474f7415efSAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5489566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
5499566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5505b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
5514f7415efSAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5524f7415efSAmlan Barua       }
5534f7415efSAmlan Barua       if (bout) {
5544f7415efSAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5559566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
5569566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5575b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5584f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5594f7415efSAmlan Barua       }
5604f7415efSAmlan Barua   #endif
5614f7415efSAmlan Barua       break;
5624f7415efSAmlan Barua     case 3:
5634f7415efSAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
5644f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
5659371c9d4SSatish Balay       N1          = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1);
5669371c9d4SSatish Balay       n1          = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1);
5674f7415efSAmlan Barua       if (fin) {
5684f7415efSAmlan Barua         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
5699566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
5709566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5715b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5724f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5734f7415efSAmlan Barua       }
5745b113e21Ss-sajid-ali       if (fout) {
5755b113e21Ss-sajid-ali         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5769566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
5779566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5785b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
5795b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5805b113e21Ss-sajid-ali       }
5814f7415efSAmlan Barua       if (bout) {
5824f7415efSAmlan Barua         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
5839566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
5849566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5855b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5864f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5874f7415efSAmlan Barua       }
5884f7415efSAmlan Barua   #else
5890c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
5900c9b18e4SAmlan Barua       if (fin) {
5910c9b18e4SAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5929566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
5939566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5945b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5950c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5960c9b18e4SAmlan Barua       }
5970c9b18e4SAmlan Barua       if (fout) {
5980c9b18e4SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5999566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
6009566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6015b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6020c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6030c9b18e4SAmlan Barua       }
6040c9b18e4SAmlan Barua       if (bout) {
6050c9b18e4SAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6069566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
6079566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6085b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6090c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6100c9b18e4SAmlan Barua       }
6114f7415efSAmlan Barua   #endif
6124f7415efSAmlan Barua       break;
6134f7415efSAmlan Barua     default:
6144f7415efSAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
6154f7415efSAmlan Barua       temp                                  = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
6164f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
6174f7415efSAmlan Barua       alloc_local                           = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
6180cf2e031SBarry Smith       N1                                    = 2 * fft->N * (PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
6194f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
6204f7415efSAmlan Barua 
6214f7415efSAmlan Barua       if (fin) {
6224f7415efSAmlan Barua         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6239566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
6249566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6255b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6264f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6274f7415efSAmlan Barua       }
6285b113e21Ss-sajid-ali       if (fout) {
6295b113e21Ss-sajid-ali         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6309566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
6319566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6325b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6335b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6345b113e21Ss-sajid-ali       }
6354f7415efSAmlan Barua       if (bout) {
6364f7415efSAmlan Barua         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6379566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
6389566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6395b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6409496c9aeSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6414f7415efSAmlan Barua       }
6424f7415efSAmlan Barua   #else
6430c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
6440c9b18e4SAmlan Barua       if (fin) {
6450c9b18e4SAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6469566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
6479566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6485b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6490c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6500c9b18e4SAmlan Barua       }
6510c9b18e4SAmlan Barua       if (fout) {
6520c9b18e4SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6539566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
6549566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6555b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6560c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6570c9b18e4SAmlan Barua       }
6580c9b18e4SAmlan Barua       if (bout) {
6590c9b18e4SAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6609566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
6619566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6625b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6630c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6640c9b18e4SAmlan Barua       }
6654f7415efSAmlan Barua   #endif
6664f7415efSAmlan Barua       break;
6674f7415efSAmlan Barua     }
668f9d91177SJunchao Zhang     /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
669f9d91177SJunchao Zhang        v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
670da81f932SPierre Jolivet        memory leaks. We void these pointers here to avoid accident uses.
671f9d91177SJunchao Zhang     */
672f9d91177SJunchao Zhang     if (fin) (*fin)->ops->replacearray = NULL;
673f9d91177SJunchao Zhang     if (fout) (*fout)->ops->replacearray = NULL;
674f9d91177SJunchao Zhang     if (bout) (*bout)->ops->replacearray = NULL;
6750cf2e031SBarry Smith #endif
6764f7415efSAmlan Barua   }
6773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6784f7415efSAmlan Barua }
6794f7415efSAmlan Barua 
6803564f093SHong Zhang /*@
68111a5261eSBarry Smith    VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.
68254efbe56SHong Zhang 
683c3339decSBarry Smith    Collective
6843564f093SHong Zhang 
6853564f093SHong Zhang    Input Parameters:
6863564f093SHong Zhang +  A - FFTW matrix
6873564f093SHong Zhang -  x - the PETSc vector
6883564f093SHong Zhang 
6892fe279fdSBarry Smith    Output Parameter:
6903564f093SHong Zhang .  y - the FFTW vector
6913564f093SHong Zhang 
692b2b77a04SHong Zhang    Level: intermediate
6933564f093SHong Zhang 
69411a5261eSBarry Smith    Note:
69511a5261eSBarry Smith    For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
69697504071SAmlan Barua    one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
69797504071SAmlan Barua    zeros. This routine does that job by scattering operation.
69897504071SAmlan Barua 
699*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
7003564f093SHong Zhang @*/
701d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
702d71ae5a4SJacob Faibussowitsch {
7033564f093SHong Zhang   PetscFunctionBegin;
704cac4c232SBarry Smith   PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
7053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
7063564f093SHong Zhang }
7073c3a9c75SAmlan Barua 
708d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
709d71ae5a4SJacob Faibussowitsch {
710ce94432eSBarry Smith   MPI_Comm    comm;
7113c3a9c75SAmlan Barua   Mat_FFT    *fft = (Mat_FFT *)A->data;
7129496c9aeSAmlan Barua   PetscInt    low;
7137a21806cSHong Zhang   PetscMPIInt rank, size;
7147a21806cSHong Zhang   PetscInt    vsize, vsize1;
7153c3a9c75SAmlan Barua   VecScatter  vecscat;
7160cf2e031SBarry Smith   IS          list1;
7173c3a9c75SAmlan Barua 
7183564f093SHong Zhang   PetscFunctionBegin;
7199566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
7209566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
7219566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
7229566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRange(y, &low, NULL));
7233c3a9c75SAmlan Barua 
7243564f093SHong Zhang   if (size == 1) {
7259566063dSJacob Faibussowitsch     PetscCall(VecGetSize(x, &vsize));
7269566063dSJacob Faibussowitsch     PetscCall(VecGetSize(y, &vsize1));
7279566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
7289566063dSJacob Faibussowitsch     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
7299566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7309566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7319566063dSJacob Faibussowitsch     PetscCall(VecScatterDestroy(&vecscat));
7329566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&list1));
7330cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
7343564f093SHong Zhang   } else {
7350cf2e031SBarry Smith     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
7360cf2e031SBarry Smith     PetscInt  ndim = fft->ndim, *dim = fft->dim;
7370cf2e031SBarry Smith     ptrdiff_t local_n0, local_0_start;
7380cf2e031SBarry Smith     ptrdiff_t local_n1, local_1_start;
7390cf2e031SBarry Smith     IS        list2;
7400cf2e031SBarry Smith   #if !defined(PETSC_USE_COMPLEX)
7410cf2e031SBarry Smith     PetscInt  i, j, k, partial_dim;
7420cf2e031SBarry Smith     PetscInt *indx1, *indx2, tempindx, tempindx1;
7430cf2e031SBarry Smith     PetscInt  NM;
7440cf2e031SBarry Smith     ptrdiff_t temp;
7450cf2e031SBarry Smith   #endif
7460cf2e031SBarry Smith 
7473c3a9c75SAmlan Barua     switch (ndim) {
7483c3a9c75SAmlan Barua     case 1:
74964657d84SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
7507a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
75126fbe8dcSKarl Rupp 
7529566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1));
7539566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2));
7549566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
7559566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7569566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7579566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
7589566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
7599566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
76064657d84SAmlan Barua   #else
761e5d7f247SAmlan Barua       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
76264657d84SAmlan Barua   #endif
7633c3a9c75SAmlan Barua       break;
7643c3a9c75SAmlan Barua     case 2:
765bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
7667a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
76726fbe8dcSKarl Rupp 
7689566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
7699566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
7709566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
7719566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7729566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7739566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
7749566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
7759566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
776bd59e6c6SAmlan Barua   #else
777c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
77826fbe8dcSKarl Rupp 
7799566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1));
7809566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2));
7813c3a9c75SAmlan Barua 
7823564f093SHong Zhang       if (dim[1] % 2 == 0) {
7833c3a9c75SAmlan Barua         NM = dim[1] + 2;
7843564f093SHong Zhang       } else {
7853c3a9c75SAmlan Barua         NM = dim[1] + 1;
7863564f093SHong Zhang       }
7873c3a9c75SAmlan Barua       for (i = 0; i < local_n0; i++) {
7883c3a9c75SAmlan Barua         for (j = 0; j < dim[1]; j++) {
7893c3a9c75SAmlan Barua           tempindx  = i * dim[1] + j;
7903c3a9c75SAmlan Barua           tempindx1 = i * NM + j;
79126fbe8dcSKarl Rupp 
7925b3e41ffSAmlan Barua           indx1[tempindx] = local_0_start * dim[1] + tempindx;
7933c3a9c75SAmlan Barua           indx2[tempindx] = low + tempindx1;
7943c3a9c75SAmlan Barua         }
7953c3a9c75SAmlan Barua       }
7963c3a9c75SAmlan Barua 
7979566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
7989566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
7993c3a9c75SAmlan Barua 
8009566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8019566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8029566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8039566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8049566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8059566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
8069566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
8079566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
808bd59e6c6SAmlan Barua   #endif
8099496c9aeSAmlan Barua       break;
8103c3a9c75SAmlan Barua 
8113c3a9c75SAmlan Barua     case 3:
812bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
8137a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
81426fbe8dcSKarl Rupp 
8159566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
8169566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
8179566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8189566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8199566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8209566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8219566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8229566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
823bd59e6c6SAmlan Barua   #else
824c4762a1bSJed Brown       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
825f1ade23cSHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
8267a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
82726fbe8dcSKarl Rupp 
8289566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1));
8299566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2));
83051d1eed7SAmlan Barua 
831db4deed7SKarl Rupp       if (dim[2] % 2 == 0) NM = dim[2] + 2;
832db4deed7SKarl Rupp       else NM = dim[2] + 1;
83351d1eed7SAmlan Barua 
83451d1eed7SAmlan Barua       for (i = 0; i < local_n0; i++) {
83551d1eed7SAmlan Barua         for (j = 0; j < dim[1]; j++) {
83651d1eed7SAmlan Barua           for (k = 0; k < dim[2]; k++) {
83751d1eed7SAmlan Barua             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
83851d1eed7SAmlan Barua             tempindx1 = i * dim[1] * NM + j * NM + k;
83926fbe8dcSKarl Rupp 
84051d1eed7SAmlan Barua             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
84151d1eed7SAmlan Barua             indx2[tempindx] = low + tempindx1;
84251d1eed7SAmlan Barua           }
84351d1eed7SAmlan Barua         }
84451d1eed7SAmlan Barua       }
84551d1eed7SAmlan Barua 
8469566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
8479566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
8489566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8499566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8509566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8519566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8529566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8539566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
8549566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
8559566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
856bd59e6c6SAmlan Barua   #endif
8579496c9aeSAmlan Barua       break;
8583c3a9c75SAmlan Barua 
8593c3a9c75SAmlan Barua     default:
860bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
8617a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
86226fbe8dcSKarl Rupp 
8639566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
8649566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
8659566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8669566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8679566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8689566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8699566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8709566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
871bd59e6c6SAmlan Barua   #else
872c4762a1bSJed Brown       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
873f1ade23cSHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
874e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
87526fbe8dcSKarl Rupp 
876e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
87726fbe8dcSKarl Rupp 
8787a21806cSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
87926fbe8dcSKarl Rupp 
880e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
881e5d7f247SAmlan Barua 
882e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
883e5d7f247SAmlan Barua 
8849566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1));
8859566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2));
886e5d7f247SAmlan Barua 
887db4deed7SKarl Rupp       if (dim[ndim - 1] % 2 == 0) NM = 2;
888db4deed7SKarl Rupp       else NM = 1;
889e5d7f247SAmlan Barua 
8906971673cSAmlan Barua       j = low;
8913564f093SHong Zhang       for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
8926971673cSAmlan Barua         indx1[i] = local_0_start * partial_dim + i;
8936971673cSAmlan Barua         indx2[i] = j;
89426fbe8dcSKarl Rupp         if (k % dim[ndim - 1] == 0) j += NM;
8956971673cSAmlan Barua         j++;
8966971673cSAmlan Barua       }
8979566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
8989566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
8999566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
9009566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9019566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9029566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
9039566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
9049566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
9059566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
9069566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
907bd59e6c6SAmlan Barua   #endif
9089496c9aeSAmlan Barua       break;
9093c3a9c75SAmlan Barua     }
9100cf2e031SBarry Smith #endif
911e81bb599SAmlan Barua   }
9123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9133c3a9c75SAmlan Barua }
9143c3a9c75SAmlan Barua 
9153564f093SHong Zhang /*@
91611a5261eSBarry Smith    VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.
9173564f093SHong Zhang 
918c3339decSBarry Smith    Collective
9193564f093SHong Zhang 
9203564f093SHong Zhang     Input Parameters:
92111a5261eSBarry Smith +   A - `MATFFTW` matrix
9223564f093SHong Zhang -   x - FFTW vector
9233564f093SHong Zhang 
9242fe279fdSBarry Smith    Output Parameter:
9253564f093SHong Zhang .  y - PETSc vector
9263564f093SHong Zhang 
9273564f093SHong Zhang    Level: intermediate
9283564f093SHong Zhang 
92911a5261eSBarry Smith    Note:
93011a5261eSBarry Smith    While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
93111a5261eSBarry Smith    `VecScatterFFTWToPetsc()` removes those extra zeros.
9323564f093SHong Zhang 
933*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
9343564f093SHong Zhang @*/
935d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
936d71ae5a4SJacob Faibussowitsch {
9373c3a9c75SAmlan Barua   PetscFunctionBegin;
938cac4c232SBarry Smith   PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
9393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9403c3a9c75SAmlan Barua }
94154efbe56SHong Zhang 
942d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
943d71ae5a4SJacob Faibussowitsch {
944ce94432eSBarry Smith   MPI_Comm    comm;
9455b3e41ffSAmlan Barua   Mat_FFT    *fft = (Mat_FFT *)A->data;
9469496c9aeSAmlan Barua   PetscInt    low;
9477a21806cSHong Zhang   PetscMPIInt rank, size;
9485b3e41ffSAmlan Barua   VecScatter  vecscat;
9490cf2e031SBarry Smith   IS          list1;
9509496c9aeSAmlan Barua 
9513564f093SHong Zhang   PetscFunctionBegin;
9529566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
9539566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
9549566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
9559566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRange(x, &low, NULL));
9565b3e41ffSAmlan Barua 
957e81bb599SAmlan Barua   if (size == 1) {
9589566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
9599566063dSJacob Faibussowitsch     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
9609566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9619566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9629566063dSJacob Faibussowitsch     PetscCall(VecScatterDestroy(&vecscat));
9639566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&list1));
964e81bb599SAmlan Barua 
9650cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
9663564f093SHong Zhang   } else {
9670cf2e031SBarry Smith     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
9680cf2e031SBarry Smith     PetscInt  ndim = fft->ndim, *dim = fft->dim;
9690cf2e031SBarry Smith     ptrdiff_t local_n0, local_0_start;
9700cf2e031SBarry Smith     ptrdiff_t local_n1, local_1_start;
9710cf2e031SBarry Smith     IS        list2;
9720cf2e031SBarry Smith   #if !defined(PETSC_USE_COMPLEX)
9730cf2e031SBarry Smith     PetscInt  i, j, k, partial_dim;
9740cf2e031SBarry Smith     PetscInt *indx1, *indx2, tempindx, tempindx1;
9750cf2e031SBarry Smith     PetscInt  NM;
9760cf2e031SBarry Smith     ptrdiff_t temp;
9770cf2e031SBarry Smith   #endif
978e81bb599SAmlan Barua     switch (ndim) {
979e81bb599SAmlan Barua     case 1:
98064657d84SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
9817a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
98226fbe8dcSKarl Rupp 
9839566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1));
9849566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2));
9859566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
9869566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9879566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9889566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
9899566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
9909566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
99164657d84SAmlan Barua   #else
9926a506ed8SAmlan Barua       SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
99364657d84SAmlan Barua   #endif
9945b3e41ffSAmlan Barua       break;
9955b3e41ffSAmlan Barua     case 2:
996bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
9977a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
99826fbe8dcSKarl Rupp 
9999566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
10009566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
10019566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
10029566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10039566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10049566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10059566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10069566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1007bd59e6c6SAmlan Barua   #else
10087a21806cSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
100926fbe8dcSKarl Rupp 
10109566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1));
10119566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2));
10125b3e41ffSAmlan Barua 
1013db4deed7SKarl Rupp       if (dim[1] % 2 == 0) NM = dim[1] + 2;
1014db4deed7SKarl Rupp       else NM = dim[1] + 1;
10155b3e41ffSAmlan Barua 
10165b3e41ffSAmlan Barua       for (i = 0; i < local_n0; i++) {
10175b3e41ffSAmlan Barua         for (j = 0; j < dim[1]; j++) {
10185b3e41ffSAmlan Barua           tempindx  = i * dim[1] + j;
10195b3e41ffSAmlan Barua           tempindx1 = i * NM + j;
102026fbe8dcSKarl Rupp 
10215b3e41ffSAmlan Barua           indx1[tempindx] = local_0_start * dim[1] + tempindx;
10225b3e41ffSAmlan Barua           indx2[tempindx] = low + tempindx1;
10235b3e41ffSAmlan Barua         }
10245b3e41ffSAmlan Barua       }
10255b3e41ffSAmlan Barua 
10269566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
10279566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
10285b3e41ffSAmlan Barua 
10299566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
10309566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10319566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10329566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10339566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10349566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
10359566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
10369566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1037bd59e6c6SAmlan Barua   #endif
10389496c9aeSAmlan Barua       break;
10395b3e41ffSAmlan Barua     case 3:
1040bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
10417a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
104226fbe8dcSKarl Rupp 
10439566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
10449566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
10459566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
10469566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10479566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10489566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10499566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10509566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1051bd59e6c6SAmlan Barua   #else
10527a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
105326fbe8dcSKarl Rupp 
10549566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1));
10559566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2));
105651d1eed7SAmlan Barua 
1057db4deed7SKarl Rupp       if (dim[2] % 2 == 0) NM = dim[2] + 2;
1058db4deed7SKarl Rupp       else NM = dim[2] + 1;
105951d1eed7SAmlan Barua 
106051d1eed7SAmlan Barua       for (i = 0; i < local_n0; i++) {
106151d1eed7SAmlan Barua         for (j = 0; j < dim[1]; j++) {
106251d1eed7SAmlan Barua           for (k = 0; k < dim[2]; k++) {
106351d1eed7SAmlan Barua             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
106451d1eed7SAmlan Barua             tempindx1 = i * dim[1] * NM + j * NM + k;
106526fbe8dcSKarl Rupp 
106651d1eed7SAmlan Barua             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
106751d1eed7SAmlan Barua             indx2[tempindx] = low + tempindx1;
106851d1eed7SAmlan Barua           }
106951d1eed7SAmlan Barua         }
107051d1eed7SAmlan Barua       }
107151d1eed7SAmlan Barua 
10729566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
10739566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
107451d1eed7SAmlan Barua 
10759566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
10769566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10779566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10789566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10799566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10809566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
10819566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
10829566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1083bd59e6c6SAmlan Barua   #endif
10849496c9aeSAmlan Barua       break;
10855b3e41ffSAmlan Barua     default:
1086bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
10877a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
108826fbe8dcSKarl Rupp 
10899566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
10909566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
10919566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
10929566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10939566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10949566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10959566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10969566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1097bd59e6c6SAmlan Barua   #else
1098ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
109926fbe8dcSKarl Rupp 
1100ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
110126fbe8dcSKarl Rupp 
1102c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
110326fbe8dcSKarl Rupp 
1104ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
1105ba6e06d0SAmlan Barua 
1106ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1107ba6e06d0SAmlan Barua 
11089566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1));
11099566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2));
1110ba6e06d0SAmlan Barua 
1111db4deed7SKarl Rupp       if (dim[ndim - 1] % 2 == 0) NM = 2;
1112db4deed7SKarl Rupp       else NM = 1;
1113ba6e06d0SAmlan Barua 
1114ba6e06d0SAmlan Barua       j = low;
11153564f093SHong Zhang       for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
1116ba6e06d0SAmlan Barua         indx1[i] = local_0_start * partial_dim + i;
1117ba6e06d0SAmlan Barua         indx2[i] = j;
11183564f093SHong Zhang         if (k % dim[ndim - 1] == 0) j += NM;
1119ba6e06d0SAmlan Barua         j++;
1120ba6e06d0SAmlan Barua       }
11219566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
11229566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
1123ba6e06d0SAmlan Barua 
11249566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
11259566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11269566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11279566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
11289566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
11299566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
11309566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
11319566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1132bd59e6c6SAmlan Barua   #endif
11339496c9aeSAmlan Barua       break;
11345b3e41ffSAmlan Barua     }
11350cf2e031SBarry Smith #endif
1136e81bb599SAmlan Barua   }
11373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
11385b3e41ffSAmlan Barua }
11395b3e41ffSAmlan Barua 
1140350f1385SJose E. Roman /*MC
1141350f1385SJose E. Roman   MATFFTW -  "fftw" - Matrix type that provides FFT via the FFTW external package.
1142350f1385SJose E. Roman 
1143350f1385SJose E. Roman   Level: intermediate
1144350f1385SJose E. Roman 
1145*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()`
1146350f1385SJose E. Roman M*/
1147d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1148d71ae5a4SJacob Faibussowitsch {
1149ce94432eSBarry Smith   MPI_Comm    comm;
1150b2b77a04SHong Zhang   Mat_FFT    *fft = (Mat_FFT *)A->data;
1151b2b77a04SHong Zhang   Mat_FFTW   *fftw;
11520cf2e031SBarry Smith   PetscInt    ndim = fft->ndim, *dim = fft->dim;
11535274ed1bSHong Zhang   const char *plans[]  = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
11545274ed1bSHong Zhang   unsigned    iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1155b2b77a04SHong Zhang   PetscBool   flg;
11564f48bc67SAmlan Barua   PetscInt    p_flag, partial_dim = 1, ctr;
115711902ff2SHong Zhang   PetscMPIInt size, rank;
11589496c9aeSAmlan Barua   ptrdiff_t  *pdim;
11599496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11600cf2e031SBarry Smith   PetscInt tot_dim;
11619496c9aeSAmlan Barua #endif
11629496c9aeSAmlan Barua 
1163b2b77a04SHong Zhang   PetscFunctionBegin;
11649566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
11659566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
11669566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1167c92418ccSAmlan Barua 
11680cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
11691878d478SAmlan Barua   fftw_mpi_init();
11700cf2e031SBarry Smith #endif
117111902ff2SHong Zhang   pdim    = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
117211902ff2SHong Zhang   pdim[0] = dim[0];
11738ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11748ca4c034SAmlan Barua   tot_dim = 2 * dim[0];
11758ca4c034SAmlan Barua #endif
11763564f093SHong Zhang   for (ctr = 1; ctr < ndim; ctr++) {
11776882372aSHong Zhang     partial_dim *= dim[ctr];
117811902ff2SHong Zhang     pdim[ctr] = dim[ctr];
11798ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1180db4deed7SKarl Rupp     if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1181db4deed7SKarl Rupp     else tot_dim *= dim[ctr];
11828ca4c034SAmlan Barua #endif
11836882372aSHong Zhang   }
11843c3a9c75SAmlan Barua 
1185b2b77a04SHong Zhang   if (size == 1) {
1186e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
11879566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
11880cf2e031SBarry Smith     fft->n = fft->N;
1189e81bb599SAmlan Barua #else
11909566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
11910cf2e031SBarry Smith     fft->n       = tot_dim;
11920cf2e031SBarry Smith #endif
11930cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
11940cf2e031SBarry Smith   } else {
11950cf2e031SBarry Smith     ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
11960cf2e031SBarry Smith   #if !defined(PETSC_USE_COMPLEX)
11970cf2e031SBarry Smith     ptrdiff_t temp;
11980cf2e031SBarry Smith     PetscInt  N1;
1199e81bb599SAmlan Barua   #endif
1200e81bb599SAmlan Barua 
1201b2b77a04SHong Zhang     switch (ndim) {
1202b2b77a04SHong Zhang     case 1:
12033c3a9c75SAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
12043c3a9c75SAmlan Barua       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1205e5d7f247SAmlan Barua   #else
12067a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
12070cf2e031SBarry Smith       fft->n = (PetscInt)local_n0;
12089566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N));
1209e5d7f247SAmlan Barua   #endif
1210b2b77a04SHong Zhang       break;
1211b2b77a04SHong Zhang     case 2:
12125b3e41ffSAmlan Barua   #if defined(PETSC_USE_COMPLEX)
12137a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
12140cf2e031SBarry Smith       fft->n = (PetscInt)local_n0 * dim[1];
12159566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
12165b3e41ffSAmlan Barua   #else
1217c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
121826fbe8dcSKarl Rupp 
12190cf2e031SBarry Smith       fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
12209566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
12215b3e41ffSAmlan Barua   #endif
1222b2b77a04SHong Zhang       break;
1223b2b77a04SHong Zhang     case 3:
122451d1eed7SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
12257a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
122626fbe8dcSKarl Rupp 
12270cf2e031SBarry Smith       fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
12289566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
122951d1eed7SAmlan Barua   #else
1230c3eba89aSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
123126fbe8dcSKarl Rupp 
12320cf2e031SBarry Smith       fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
12339566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
123451d1eed7SAmlan Barua   #endif
1235b2b77a04SHong Zhang       break;
1236b2b77a04SHong Zhang     default:
1237b3a17365SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
12387a21806cSHong Zhang       fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);
123926fbe8dcSKarl Rupp 
12400cf2e031SBarry Smith       fft->n = (PetscInt)local_n0 * partial_dim;
12419566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1242b3a17365SAmlan Barua   #else
1243b3a17365SAmlan Barua       temp = pdim[ndim - 1];
124426fbe8dcSKarl Rupp 
1245b3a17365SAmlan Barua       pdim[ndim - 1] = temp / 2 + 1;
124626fbe8dcSKarl Rupp 
1247c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
124826fbe8dcSKarl Rupp 
12490cf2e031SBarry Smith       fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp;
12500cf2e031SBarry Smith       N1     = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);
125126fbe8dcSKarl Rupp 
1252b3a17365SAmlan Barua       pdim[ndim - 1] = temp;
125326fbe8dcSKarl Rupp 
12549566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1255b3a17365SAmlan Barua   #endif
1256b2b77a04SHong Zhang       break;
1257b2b77a04SHong Zhang     }
12580cf2e031SBarry Smith #endif
1259b2b77a04SHong Zhang   }
12602277172eSMark Adams   free(pdim);
12619566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
12624dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&fftw));
1263b2b77a04SHong Zhang   fft->data = (void *)fftw;
1264b2b77a04SHong Zhang 
12650c74a584SJed Brown   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1266e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
126726fbe8dcSKarl Rupp 
12689566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw)));
12698c1d5ab3SHong Zhang   if (size == 1) {
1270a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
1271a1b6d50cSHong Zhang     fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1272a1b6d50cSHong Zhang #else
12738c1d5ab3SHong Zhang     fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1274a1b6d50cSHong Zhang #endif
12758c1d5ab3SHong Zhang   }
127626fbe8dcSKarl Rupp 
1277b1301b2eSHong Zhang   for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];
1278c92418ccSAmlan Barua 
1279f4259b30SLisandro Dalcin   fftw->p_forward  = NULL;
1280f4259b30SLisandro Dalcin   fftw->p_backward = NULL;
1281b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1282b2b77a04SHong Zhang 
1283b2b77a04SHong Zhang   if (size == 1) {
1284b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1285b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
12860cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
1287b2b77a04SHong Zhang   } else {
1288b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1289b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
12900cf2e031SBarry Smith #endif
1291b2b77a04SHong Zhang   }
1292b2b77a04SHong Zhang   fft->matdestroy = MatDestroy_FFTW;
1293b2b77a04SHong Zhang   A->assembled    = PETSC_TRUE;
12944a52bad8SHong Zhang   A->preallocated = PETSC_TRUE;
129526fbe8dcSKarl Rupp 
12969566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
12979566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
12989566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));
1299b2b77a04SHong Zhang 
1300b2b77a04SHong Zhang   /* get runtime options */
1301d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
13029566063dSJacob Faibussowitsch   PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
13035f80ce2aSJacob Faibussowitsch   if (flg) fftw->p_flag = iplans[p_flag];
1304d0609cedSBarry Smith   PetscOptionsEnd();
13053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1306b2b77a04SHong Zhang }
1307