xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision f4f49eeac7efa77fffa46b7ff95a3ed169f659ed)
1b2b77a04SHong Zhang /*
2b2b77a04SHong Zhang     Provides an interface to the FFTW package.
3c4762a1bSJed Brown     Testing examples can be found in ~src/mat/tests
4b2b77a04SHong Zhang */
5b2b77a04SHong Zhang 
6c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/
7b2b77a04SHong Zhang EXTERN_C_BEGIN
80cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
9c6db04a5SJed Brown   #include <fftw3-mpi.h>
100cf2e031SBarry Smith #else
110cf2e031SBarry Smith   #include <fftw3.h>
120cf2e031SBarry Smith #endif
13b2b77a04SHong Zhang EXTERN_C_END
14b2b77a04SHong Zhang 
15b2b77a04SHong Zhang typedef struct {
16b9ae062cSHong Zhang   ptrdiff_t ndim_fftw, *dim_fftw;
17a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
18a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
19a1b6d50cSHong Zhang #else
208c1d5ab3SHong Zhang   fftw_iodim *iodims;
21a1b6d50cSHong Zhang #endif
22e5d7f247SAmlan Barua   PetscInt     partial_dim;
23b2b77a04SHong Zhang   fftw_plan    p_forward, p_backward;
24b2b77a04SHong Zhang   unsigned     p_flag;                                      /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
25da81f932SPierre Jolivet   PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw plan should be
26b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
27b2b77a04SHong Zhang } Mat_FFTW;
28b2b77a04SHong Zhang 
29b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec);
30b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec);
310cf2e031SBarry Smith extern PetscErrorCode MatDestroy_FFTW(Mat);
320cf2e031SBarry Smith extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
330cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
34b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec);
35b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec);
36b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
370cf2e031SBarry Smith #endif
38b2b77a04SHong Zhang 
39d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
40d71ae5a4SJacob Faibussowitsch {
41b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
42b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
43f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
44f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
451acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
46a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
47a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
48a1b6d50cSHong Zhang   #else
498c1d5ab3SHong Zhang   fftw_iodim *iodims;
50a1b6d50cSHong Zhang   #endif
511acd80e4SHong Zhang   PetscInt i;
521acd80e4SHong Zhang #endif
531acd80e4SHong Zhang   PetscInt ndim = fft->ndim, *dim = fft->dim;
54b2b77a04SHong Zhang 
55b2b77a04SHong Zhang   PetscFunctionBegin;
569566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &x_array));
579566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
586aad120cSJose E. Roman   if (!fftw->p_forward) { /* create a plan, then execute it */
59b2b77a04SHong Zhang     switch (ndim) {
60b2b77a04SHong Zhang     case 1:
6158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
62b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
6358a912c5SAmlan Barua #else
6458a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
6558a912c5SAmlan Barua #endif
66b2b77a04SHong Zhang       break;
67b2b77a04SHong Zhang     case 2:
6858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
69b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
7058a912c5SAmlan Barua #else
7158a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
7258a912c5SAmlan Barua #endif
73b2b77a04SHong Zhang       break;
74b2b77a04SHong Zhang     case 3:
7558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
76b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
7758a912c5SAmlan Barua #else
7858a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
7958a912c5SAmlan Barua #endif
80b2b77a04SHong Zhang       break;
81b2b77a04SHong Zhang     default:
8258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
83a1b6d50cSHong Zhang       iodims = fftw->iodims;
84a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
858c1d5ab3SHong Zhang       if (ndim) {
86a1b6d50cSHong Zhang         iodims[ndim - 1].n  = (ptrdiff_t)dim[ndim - 1];
878c1d5ab3SHong Zhang         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
888c1d5ab3SHong Zhang         for (i = ndim - 2; i >= 0; --i) {
89a1b6d50cSHong Zhang           iodims[i].n  = (ptrdiff_t)dim[i];
908c1d5ab3SHong Zhang           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
918c1d5ab3SHong Zhang         }
928c1d5ab3SHong Zhang       }
93a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
94a1b6d50cSHong Zhang   #else
95a1b6d50cSHong Zhang       if (ndim) {
96a1b6d50cSHong Zhang         iodims[ndim - 1].n  = (int)dim[ndim - 1];
97a1b6d50cSHong Zhang         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
98a1b6d50cSHong Zhang         for (i = ndim - 2; i >= 0; --i) {
99a1b6d50cSHong Zhang           iodims[i].n  = (int)dim[i];
100a1b6d50cSHong Zhang           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
101a1b6d50cSHong Zhang         }
102a1b6d50cSHong Zhang       }
103a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
104a1b6d50cSHong Zhang   #endif
105a1b6d50cSHong Zhang 
10658a912c5SAmlan Barua #else
107a31b9140SHong Zhang       fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
10858a912c5SAmlan Barua #endif
109b2b77a04SHong Zhang       break;
110b2b77a04SHong Zhang     }
111f4fca6d4SMatthew G. Knepley     fftw->finarray  = (PetscScalar *)x_array;
112b2b77a04SHong Zhang     fftw->foutarray = y_array;
113b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
114b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
115b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
116b2b77a04SHong Zhang   } else {                                                         /* use existing plan */
117b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
1187e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
119b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
1207e4bc134SDominic Meiser #else
1217e4bc134SDominic Meiser       fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
1227e4bc134SDominic Meiser #endif
123b2b77a04SHong Zhang     } else {
124b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
125b2b77a04SHong Zhang     }
126b2b77a04SHong Zhang   }
1279566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
1289566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
1293ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
130b2b77a04SHong Zhang }
131b2b77a04SHong Zhang 
132d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
133d71ae5a4SJacob Faibussowitsch {
134b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
135b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
136f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
137f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
138b2b77a04SHong Zhang   PetscInt           ndim = fft->ndim, *dim = fft->dim;
1391acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
140a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
141a1b6d50cSHong Zhang   fftw_iodim64 *iodims = fftw->iodims;
142a1b6d50cSHong Zhang   #else
143a1b6d50cSHong Zhang   fftw_iodim *iodims = fftw->iodims;
144a1b6d50cSHong Zhang   #endif
1451acd80e4SHong Zhang #endif
146b2b77a04SHong Zhang 
147b2b77a04SHong Zhang   PetscFunctionBegin;
1489566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &x_array));
1499566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
1506aad120cSJose E. Roman   if (!fftw->p_backward) { /* create a plan, then execute it */
151b2b77a04SHong Zhang     switch (ndim) {
152b2b77a04SHong Zhang     case 1:
15358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
154b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
15558a912c5SAmlan Barua #else
156e81bb599SAmlan Barua       fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
15758a912c5SAmlan Barua #endif
158b2b77a04SHong Zhang       break;
159b2b77a04SHong Zhang     case 2:
16058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
161b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
16258a912c5SAmlan Barua #else
163e81bb599SAmlan Barua       fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
16458a912c5SAmlan Barua #endif
165b2b77a04SHong Zhang       break;
166b2b77a04SHong Zhang     case 3:
16758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
168b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
16958a912c5SAmlan Barua #else
170e81bb599SAmlan Barua       fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
17158a912c5SAmlan Barua #endif
172b2b77a04SHong Zhang       break;
173b2b77a04SHong Zhang     default:
17458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
175a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
176a1b6d50cSHong Zhang       fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
177a1b6d50cSHong Zhang   #else
1788c1d5ab3SHong Zhang       fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
179a1b6d50cSHong Zhang   #endif
18058a912c5SAmlan Barua #else
181a31b9140SHong Zhang       fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
18258a912c5SAmlan Barua #endif
183b2b77a04SHong Zhang       break;
184b2b77a04SHong Zhang     }
185f4fca6d4SMatthew G. Knepley     fftw->binarray  = (PetscScalar *)x_array;
186b2b77a04SHong Zhang     fftw->boutarray = y_array;
1872f613bf5SBarry Smith     fftw_execute(fftw->p_backward);
188b2b77a04SHong Zhang   } else {                                                         /* use existing plan */
189b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
1907e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
191b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
1927e4bc134SDominic Meiser #else
1937e4bc134SDominic Meiser       fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
1947e4bc134SDominic Meiser #endif
195b2b77a04SHong Zhang     } else {
1962f613bf5SBarry Smith       fftw_execute(fftw->p_backward);
197b2b77a04SHong Zhang     }
198b2b77a04SHong Zhang   }
1999566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
2009566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
2013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
202b2b77a04SHong Zhang }
203b2b77a04SHong Zhang 
2040cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
205d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
206d71ae5a4SJacob Faibussowitsch {
207b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
208b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
209f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
210f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
211c92418ccSAmlan Barua   PetscInt           ndim = fft->ndim, *dim = fft->dim;
212ce94432eSBarry Smith   MPI_Comm           comm;
213b2b77a04SHong Zhang 
214b2b77a04SHong Zhang   PetscFunctionBegin;
2159566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2169566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &x_array));
2179566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
2186aad120cSJose E. Roman   if (!fftw->p_forward) { /* create a plan, then execute it */
219b2b77a04SHong Zhang     switch (ndim) {
220b2b77a04SHong Zhang     case 1:
2213c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
222b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
223ae0a50aaSHong Zhang   #else
2244f8276c3SHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
2253c3a9c75SAmlan Barua   #endif
226b2b77a04SHong Zhang       break;
227b2b77a04SHong Zhang     case 2:
22897504071SAmlan Barua   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
229b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2303c3a9c75SAmlan Barua   #else
2313c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2323c3a9c75SAmlan Barua   #endif
233b2b77a04SHong Zhang       break;
234b2b77a04SHong Zhang     case 3:
2353c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
236b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2373c3a9c75SAmlan Barua   #else
23851d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2393c3a9c75SAmlan Barua   #endif
240b2b77a04SHong Zhang       break;
241b2b77a04SHong Zhang     default:
2423c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
243c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2443c3a9c75SAmlan Barua   #else
2453c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2463c3a9c75SAmlan Barua   #endif
247b2b77a04SHong Zhang       break;
248b2b77a04SHong Zhang     }
249f4fca6d4SMatthew G. Knepley     fftw->finarray  = (PetscScalar *)x_array;
250b2b77a04SHong Zhang     fftw->foutarray = y_array;
251b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
252b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
253b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
254b2b77a04SHong Zhang   } else {                                                         /* use existing plan */
255b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
256b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
257b2b77a04SHong Zhang     } else {
258b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
259b2b77a04SHong Zhang     }
260b2b77a04SHong Zhang   }
2619566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
2629566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
2633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
264b2b77a04SHong Zhang }
265b2b77a04SHong Zhang 
266d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
267d71ae5a4SJacob Faibussowitsch {
268b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
269b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
270f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
271f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
272c92418ccSAmlan Barua   PetscInt           ndim = fft->ndim, *dim = fft->dim;
273ce94432eSBarry Smith   MPI_Comm           comm;
274b2b77a04SHong Zhang 
275b2b77a04SHong Zhang   PetscFunctionBegin;
2769566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2779566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &x_array));
2789566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
2796aad120cSJose E. Roman   if (!fftw->p_backward) { /* create a plan, then execute it */
280b2b77a04SHong Zhang     switch (ndim) {
281b2b77a04SHong Zhang     case 1:
2823c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
283b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
284ae0a50aaSHong Zhang   #else
2854f8276c3SHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
2863c3a9c75SAmlan Barua   #endif
287b2b77a04SHong Zhang       break;
288b2b77a04SHong Zhang     case 2:
28997504071SAmlan Barua   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
290b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
2913c3a9c75SAmlan Barua   #else
2923c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
2933c3a9c75SAmlan Barua   #endif
294b2b77a04SHong Zhang       break;
295b2b77a04SHong Zhang     case 3:
2963c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
297b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
2983c3a9c75SAmlan Barua   #else
2993c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3003c3a9c75SAmlan Barua   #endif
301b2b77a04SHong Zhang       break;
302b2b77a04SHong Zhang     default:
3033c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
304c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
3053c3a9c75SAmlan Barua   #else
3063c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3073c3a9c75SAmlan Barua   #endif
308b2b77a04SHong Zhang       break;
309b2b77a04SHong Zhang     }
310f4fca6d4SMatthew G. Knepley     fftw->binarray  = (PetscScalar *)x_array;
311b2b77a04SHong Zhang     fftw->boutarray = y_array;
3122f613bf5SBarry Smith     fftw_execute(fftw->p_backward);
313b2b77a04SHong Zhang   } else {                                                         /* use existing plan */
314b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
315b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
316b2b77a04SHong Zhang     } else {
3172f613bf5SBarry Smith       fftw_execute(fftw->p_backward);
318b2b77a04SHong Zhang     }
319b2b77a04SHong Zhang   }
3209566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
3219566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
3223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
323b2b77a04SHong Zhang }
3240cf2e031SBarry Smith #endif
325b2b77a04SHong Zhang 
326d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_FFTW(Mat A)
327d71ae5a4SJacob Faibussowitsch {
328b2b77a04SHong Zhang   Mat_FFT  *fft  = (Mat_FFT *)A->data;
329b2b77a04SHong Zhang   Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
330b2b77a04SHong Zhang 
331b2b77a04SHong Zhang   PetscFunctionBegin;
332b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
333b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
334ad540459SPierre Jolivet   if (fftw->iodims) free(fftw->iodims);
3359566063dSJacob Faibussowitsch   PetscCall(PetscFree(fftw->dim_fftw));
3369566063dSJacob Faibussowitsch   PetscCall(PetscFree(fft->data));
3372e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
3382e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
3392e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
3400cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
3416ccf0b3eSHong Zhang   fftw_mpi_cleanup();
3420cf2e031SBarry Smith #endif
3433ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
344b2b77a04SHong Zhang }
345b2b77a04SHong Zhang 
3460cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
347c6db04a5SJed Brown   #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I  "petscvec.h"   I*/
348d71ae5a4SJacob Faibussowitsch PetscErrorCode VecDestroy_MPIFFTW(Vec v)
349d71ae5a4SJacob Faibussowitsch {
350b2b77a04SHong Zhang   PetscScalar *array;
351b2b77a04SHong Zhang 
352b2b77a04SHong Zhang   PetscFunctionBegin;
3539566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &array));
3542f613bf5SBarry Smith   fftw_free((fftw_complex *)array);
3559566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &array));
3569566063dSJacob Faibussowitsch   PetscCall(VecDestroy_MPI(v));
3573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
358b2b77a04SHong Zhang }
3590cf2e031SBarry Smith #endif
360b2b77a04SHong Zhang 
3610cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
362d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
363d71ae5a4SJacob Faibussowitsch {
3645b113e21Ss-sajid-ali   Mat A;
3655b113e21Ss-sajid-ali 
3665b113e21Ss-sajid-ali   PetscFunctionBegin;
3679566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
3689566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
3693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3705b113e21Ss-sajid-ali }
3715b113e21Ss-sajid-ali 
372d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
373d71ae5a4SJacob Faibussowitsch {
3745b113e21Ss-sajid-ali   Mat A;
3755b113e21Ss-sajid-ali 
3765b113e21Ss-sajid-ali   PetscFunctionBegin;
3779566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
3789566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
3793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3805b113e21Ss-sajid-ali }
3815b113e21Ss-sajid-ali 
382d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
383d71ae5a4SJacob Faibussowitsch {
3845b113e21Ss-sajid-ali   Mat A;
3855b113e21Ss-sajid-ali 
3865b113e21Ss-sajid-ali   PetscFunctionBegin;
3879566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
3889566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
3893ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3905b113e21Ss-sajid-ali }
3910cf2e031SBarry Smith #endif
3925b113e21Ss-sajid-ali 
3934be45526SHong Zhang /*@
3942a7a6963SBarry Smith   MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
39511a5261eSBarry Smith   parallel layout determined by `MATFFTW`
3964f7415efSAmlan Barua 
397c3339decSBarry Smith   Collective
3984f7415efSAmlan Barua 
3994f7415efSAmlan Barua   Input Parameter:
4003564f093SHong Zhang . A - the matrix
4014f7415efSAmlan Barua 
402d8d19677SJose E. Roman   Output Parameters:
403cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW
4046b867d5aSJose E. Roman . y - (optional) output vector of forward FFTW
405cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW
4064f7415efSAmlan Barua 
4072ef1f0ffSBarry Smith   Options Database Key:
4082ef1f0ffSBarry Smith . -mat_fftw_plannerflags - set FFTW planner flags
4092ef1f0ffSBarry Smith 
4104f7415efSAmlan Barua   Level: advanced
4113564f093SHong Zhang 
41211a5261eSBarry Smith   Notes:
41311a5261eSBarry Smith   The parallel layout of output of forward FFTW is always same as the input
41497504071SAmlan Barua   of the backward FFTW. But parallel layout of the input vector of forward
41597504071SAmlan Barua   FFTW might not be same as the output of backward FFTW.
41611a5261eSBarry Smith 
41711a5261eSBarry Smith   We need to provide enough space while doing parallel real transform.
41897504071SAmlan Barua   We need to pad extra zeros at the end of last dimension. For this reason the one needs to
41997504071SAmlan Barua   invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
420e0ec536eSAmlan Barua   last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
421e0ec536eSAmlan Barua   depends on if the last dimension is even or odd. If the last dimension is even need to pad two
422e0ec536eSAmlan Barua   zeros if it is odd only one zero is needed.
42311a5261eSBarry Smith 
424e0ec536eSAmlan Barua   Lastly one needs some scratch space at the end of data set in each process. alloc_local
425e0ec536eSAmlan Barua   figures out how much space is needed, i.e. it figures out the data+scratch space for
426e0ec536eSAmlan Barua   each processor and returns that.
4274f7415efSAmlan Barua 
428fe59aa6dSJacob Faibussowitsch   Developer Notes:
42911a5261eSBarry Smith   How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?
43011a5261eSBarry Smith 
4311cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
4324be45526SHong Zhang @*/
433d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
434d71ae5a4SJacob Faibussowitsch {
4354be45526SHong Zhang   PetscFunctionBegin;
436cac4c232SBarry Smith   PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
4373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4384be45526SHong Zhang }
4394be45526SHong Zhang 
440d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
441d71ae5a4SJacob Faibussowitsch {
4424f7415efSAmlan Barua   PetscMPIInt size, rank;
443ce94432eSBarry Smith   MPI_Comm    comm;
4444f7415efSAmlan Barua   Mat_FFT    *fft = (Mat_FFT *)A->data;
4454f7415efSAmlan Barua 
4464f7415efSAmlan Barua   PetscFunctionBegin;
4474f7415efSAmlan Barua   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4484f7415efSAmlan Barua   PetscValidType(A, 1);
4499566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
4504f7415efSAmlan Barua 
4519566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
4529566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4534f7415efSAmlan Barua   if (size == 1) { /* sequential case */
4544f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4559566063dSJacob Faibussowitsch     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
4569566063dSJacob Faibussowitsch     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
4579566063dSJacob Faibussowitsch     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
4584f7415efSAmlan Barua #else
4599566063dSJacob Faibussowitsch     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
4609566063dSJacob Faibussowitsch     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
4619566063dSJacob Faibussowitsch     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
4624f7415efSAmlan Barua #endif
4630cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
46497504071SAmlan Barua   } else { /* parallel cases */
4650cf2e031SBarry Smith     Mat_FFTW     *fftw = (Mat_FFTW *)fft->data;
4660cf2e031SBarry Smith     PetscInt      ndim = fft->ndim, *dim = fft->dim;
4674f7415efSAmlan Barua     ptrdiff_t     alloc_local, local_n0, local_0_start;
4689496c9aeSAmlan Barua     ptrdiff_t     local_n1;
4699496c9aeSAmlan Barua     fftw_complex *data_fout;
4709496c9aeSAmlan Barua     ptrdiff_t     local_1_start;
4719496c9aeSAmlan Barua   #if defined(PETSC_USE_COMPLEX)
4729496c9aeSAmlan Barua     fftw_complex *data_fin, *data_bout;
4739496c9aeSAmlan Barua   #else
4744f7415efSAmlan Barua     double   *data_finr, *data_boutr;
4756ccf0b3eSHong Zhang     PetscInt  n1, N1;
4769496c9aeSAmlan Barua     ptrdiff_t temp;
4779496c9aeSAmlan Barua   #endif
4789496c9aeSAmlan Barua 
4794f7415efSAmlan Barua     switch (ndim) {
4804f7415efSAmlan Barua     case 1:
48157625b84SAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
4824f8276c3SHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
48357625b84SAmlan Barua   #else
48457625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
48557625b84SAmlan Barua       if (fin) {
48657625b84SAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
4879566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin));
4889566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
4895b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
49057625b84SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
49157625b84SAmlan Barua       }
49257625b84SAmlan Barua       if (fout) {
49357625b84SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
4949566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout));
4959566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
4965b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
49757625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
49857625b84SAmlan Barua       }
49957625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
50057625b84SAmlan Barua       if (bout) {
50157625b84SAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5029566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout));
5039566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5045b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
50557625b84SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
50657625b84SAmlan Barua       }
50757625b84SAmlan Barua       break;
50857625b84SAmlan Barua   #endif
5094f7415efSAmlan Barua     case 2:
51097504071SAmlan Barua   #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
5114f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
5129371c9d4SSatish Balay       N1          = 2 * dim[0] * (dim[1] / 2 + 1);
5139371c9d4SSatish Balay       n1          = 2 * local_n0 * (dim[1] / 2 + 1);
5144f7415efSAmlan Barua       if (fin) {
5154f7415efSAmlan Barua         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
5169566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
5179566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5185b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5194f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5204f7415efSAmlan Barua       }
52157625b84SAmlan Barua       if (fout) {
52257625b84SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5239566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
5249566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5255b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
52657625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
52757625b84SAmlan Barua       }
5285b113e21Ss-sajid-ali       if (bout) {
5295b113e21Ss-sajid-ali         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
5309566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
5319566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5325b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5335b113e21Ss-sajid-ali         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5345b113e21Ss-sajid-ali       }
5354f7415efSAmlan Barua   #else
5364f7415efSAmlan Barua       /* Get local size */
5374f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
5384f7415efSAmlan Barua       if (fin) {
5394f7415efSAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5409566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
5419566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5425b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5434f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5444f7415efSAmlan Barua       }
5454f7415efSAmlan Barua       if (fout) {
5464f7415efSAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5479566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
5489566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5495b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
5504f7415efSAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5514f7415efSAmlan Barua       }
5524f7415efSAmlan Barua       if (bout) {
5534f7415efSAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5549566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
5559566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5565b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5574f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5584f7415efSAmlan Barua       }
5594f7415efSAmlan Barua   #endif
5604f7415efSAmlan Barua       break;
5614f7415efSAmlan Barua     case 3:
5624f7415efSAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
5634f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
5649371c9d4SSatish Balay       N1          = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1);
5659371c9d4SSatish Balay       n1          = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1);
5664f7415efSAmlan Barua       if (fin) {
5674f7415efSAmlan Barua         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
5689566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
5699566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5705b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5714f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5724f7415efSAmlan Barua       }
5735b113e21Ss-sajid-ali       if (fout) {
5745b113e21Ss-sajid-ali         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5759566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
5769566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5775b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
5785b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5795b113e21Ss-sajid-ali       }
5804f7415efSAmlan Barua       if (bout) {
5814f7415efSAmlan Barua         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
5829566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
5839566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5845b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5854f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5864f7415efSAmlan Barua       }
5874f7415efSAmlan Barua   #else
5880c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
5890c9b18e4SAmlan Barua       if (fin) {
5900c9b18e4SAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5919566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
5929566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5935b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5940c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5950c9b18e4SAmlan Barua       }
5960c9b18e4SAmlan Barua       if (fout) {
5970c9b18e4SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5989566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
5999566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6005b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6010c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6020c9b18e4SAmlan Barua       }
6030c9b18e4SAmlan Barua       if (bout) {
6040c9b18e4SAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6059566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
6069566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6075b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6080c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6090c9b18e4SAmlan Barua       }
6104f7415efSAmlan Barua   #endif
6114f7415efSAmlan Barua       break;
6124f7415efSAmlan Barua     default:
6134f7415efSAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
6144f7415efSAmlan Barua       temp                                  = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
6154f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
6164f7415efSAmlan Barua       alloc_local                           = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
617*f4f49eeaSPierre Jolivet       N1                                    = 2 * fft->N * (PetscInt)(fftw->dim_fftw[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
6184f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
6194f7415efSAmlan Barua 
6204f7415efSAmlan Barua       if (fin) {
6214f7415efSAmlan Barua         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6229566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
6239566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6245b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6254f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6264f7415efSAmlan Barua       }
6275b113e21Ss-sajid-ali       if (fout) {
6285b113e21Ss-sajid-ali         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6299566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
6309566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6315b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6325b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6335b113e21Ss-sajid-ali       }
6344f7415efSAmlan Barua       if (bout) {
6354f7415efSAmlan Barua         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6369566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
6379566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6385b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6399496c9aeSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6404f7415efSAmlan Barua       }
6414f7415efSAmlan Barua   #else
6420c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
6430c9b18e4SAmlan Barua       if (fin) {
6440c9b18e4SAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6459566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
6469566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6475b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6480c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6490c9b18e4SAmlan Barua       }
6500c9b18e4SAmlan Barua       if (fout) {
6510c9b18e4SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6529566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
6539566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6545b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6550c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6560c9b18e4SAmlan Barua       }
6570c9b18e4SAmlan Barua       if (bout) {
6580c9b18e4SAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6599566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
6609566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6615b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6620c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6630c9b18e4SAmlan Barua       }
6644f7415efSAmlan Barua   #endif
6654f7415efSAmlan Barua       break;
6664f7415efSAmlan Barua     }
667f9d91177SJunchao Zhang     /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
668f9d91177SJunchao Zhang        v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
669da81f932SPierre Jolivet        memory leaks. We void these pointers here to avoid accident uses.
670f9d91177SJunchao Zhang     */
671f9d91177SJunchao Zhang     if (fin) (*fin)->ops->replacearray = NULL;
672f9d91177SJunchao Zhang     if (fout) (*fout)->ops->replacearray = NULL;
673f9d91177SJunchao Zhang     if (bout) (*bout)->ops->replacearray = NULL;
6740cf2e031SBarry Smith #endif
6754f7415efSAmlan Barua   }
6763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6774f7415efSAmlan Barua }
6784f7415efSAmlan Barua 
6793564f093SHong Zhang /*@
68011a5261eSBarry Smith   VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.
68154efbe56SHong Zhang 
682c3339decSBarry Smith   Collective
6833564f093SHong Zhang 
6843564f093SHong Zhang   Input Parameters:
6853564f093SHong Zhang + A - FFTW matrix
6863564f093SHong Zhang - x - the PETSc vector
6873564f093SHong Zhang 
6882fe279fdSBarry Smith   Output Parameter:
6893564f093SHong Zhang . y - the FFTW vector
6903564f093SHong Zhang 
691b2b77a04SHong Zhang   Level: intermediate
6923564f093SHong Zhang 
69311a5261eSBarry Smith   Note:
69411a5261eSBarry Smith   For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
69597504071SAmlan Barua   one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
69697504071SAmlan Barua   zeros. This routine does that job by scattering operation.
69797504071SAmlan Barua 
6981cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
6993564f093SHong Zhang @*/
700d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
701d71ae5a4SJacob Faibussowitsch {
7023564f093SHong Zhang   PetscFunctionBegin;
703cac4c232SBarry Smith   PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
7043ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
7053564f093SHong Zhang }
7063c3a9c75SAmlan Barua 
70766976f2fSJacob Faibussowitsch static PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
708d71ae5a4SJacob Faibussowitsch {
709ce94432eSBarry Smith   MPI_Comm    comm;
7103c3a9c75SAmlan Barua   Mat_FFT    *fft = (Mat_FFT *)A->data;
7119496c9aeSAmlan Barua   PetscInt    low;
7127a21806cSHong Zhang   PetscMPIInt rank, size;
7137a21806cSHong Zhang   PetscInt    vsize, vsize1;
7143c3a9c75SAmlan Barua   VecScatter  vecscat;
7150cf2e031SBarry Smith   IS          list1;
7163c3a9c75SAmlan Barua 
7173564f093SHong Zhang   PetscFunctionBegin;
7189566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
7199566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
7209566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
7219566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRange(y, &low, NULL));
7223c3a9c75SAmlan Barua 
7233564f093SHong Zhang   if (size == 1) {
7249566063dSJacob Faibussowitsch     PetscCall(VecGetSize(x, &vsize));
7259566063dSJacob Faibussowitsch     PetscCall(VecGetSize(y, &vsize1));
7269566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
7279566063dSJacob Faibussowitsch     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
7289566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7299566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7309566063dSJacob Faibussowitsch     PetscCall(VecScatterDestroy(&vecscat));
7319566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&list1));
7320cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
7333564f093SHong Zhang   } else {
7340cf2e031SBarry Smith     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
7350cf2e031SBarry Smith     PetscInt  ndim = fft->ndim, *dim = fft->dim;
7360cf2e031SBarry Smith     ptrdiff_t local_n0, local_0_start;
7370cf2e031SBarry Smith     ptrdiff_t local_n1, local_1_start;
7380cf2e031SBarry Smith     IS        list2;
7390cf2e031SBarry Smith   #if !defined(PETSC_USE_COMPLEX)
7400cf2e031SBarry Smith     PetscInt  i, j, k, partial_dim;
7410cf2e031SBarry Smith     PetscInt *indx1, *indx2, tempindx, tempindx1;
7420cf2e031SBarry Smith     PetscInt  NM;
7430cf2e031SBarry Smith     ptrdiff_t temp;
7440cf2e031SBarry Smith   #endif
7450cf2e031SBarry Smith 
7463c3a9c75SAmlan Barua     switch (ndim) {
7473c3a9c75SAmlan Barua     case 1:
74864657d84SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
7497a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
75026fbe8dcSKarl Rupp 
7519566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1));
7529566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2));
7539566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
7549566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7559566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7569566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
7579566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
7589566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
75964657d84SAmlan Barua   #else
760e5d7f247SAmlan Barua       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
76164657d84SAmlan Barua   #endif
7623c3a9c75SAmlan Barua       break;
7633c3a9c75SAmlan Barua     case 2:
764bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
7657a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
76626fbe8dcSKarl Rupp 
7679566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
7689566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
7699566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
7709566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7719566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7729566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
7739566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
7749566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
775bd59e6c6SAmlan Barua   #else
776c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
77726fbe8dcSKarl Rupp 
7789566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1));
7799566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2));
7803c3a9c75SAmlan Barua 
7813564f093SHong Zhang       if (dim[1] % 2 == 0) {
7823c3a9c75SAmlan Barua         NM = dim[1] + 2;
7833564f093SHong Zhang       } else {
7843c3a9c75SAmlan Barua         NM = dim[1] + 1;
7853564f093SHong Zhang       }
7863c3a9c75SAmlan Barua       for (i = 0; i < local_n0; i++) {
7873c3a9c75SAmlan Barua         for (j = 0; j < dim[1]; j++) {
7883c3a9c75SAmlan Barua           tempindx  = i * dim[1] + j;
7893c3a9c75SAmlan Barua           tempindx1 = i * NM + j;
79026fbe8dcSKarl Rupp 
7915b3e41ffSAmlan Barua           indx1[tempindx] = local_0_start * dim[1] + tempindx;
7923c3a9c75SAmlan Barua           indx2[tempindx] = low + tempindx1;
7933c3a9c75SAmlan Barua         }
7943c3a9c75SAmlan Barua       }
7953c3a9c75SAmlan Barua 
7969566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
7979566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
7983c3a9c75SAmlan Barua 
7999566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8009566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8019566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8029566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8039566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8049566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
8059566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
8069566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
807bd59e6c6SAmlan Barua   #endif
8089496c9aeSAmlan Barua       break;
8093c3a9c75SAmlan Barua 
8103c3a9c75SAmlan Barua     case 3:
811bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
8127a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
81326fbe8dcSKarl Rupp 
8149566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
8159566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
8169566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8179566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8189566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8199566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8209566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8219566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
822bd59e6c6SAmlan Barua   #else
823c4762a1bSJed Brown       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
824f1ade23cSHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
8257a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
82626fbe8dcSKarl Rupp 
8279566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1));
8289566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2));
82951d1eed7SAmlan Barua 
830db4deed7SKarl Rupp       if (dim[2] % 2 == 0) NM = dim[2] + 2;
831db4deed7SKarl Rupp       else NM = dim[2] + 1;
83251d1eed7SAmlan Barua 
83351d1eed7SAmlan Barua       for (i = 0; i < local_n0; i++) {
83451d1eed7SAmlan Barua         for (j = 0; j < dim[1]; j++) {
83551d1eed7SAmlan Barua           for (k = 0; k < dim[2]; k++) {
83651d1eed7SAmlan Barua             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
83751d1eed7SAmlan Barua             tempindx1 = i * dim[1] * NM + j * NM + k;
83826fbe8dcSKarl Rupp 
83951d1eed7SAmlan Barua             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
84051d1eed7SAmlan Barua             indx2[tempindx] = low + tempindx1;
84151d1eed7SAmlan Barua           }
84251d1eed7SAmlan Barua         }
84351d1eed7SAmlan Barua       }
84451d1eed7SAmlan Barua 
8459566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
8469566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
8479566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8489566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8499566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8509566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8519566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8529566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
8539566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
8549566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
855bd59e6c6SAmlan Barua   #endif
8569496c9aeSAmlan Barua       break;
8573c3a9c75SAmlan Barua 
8583c3a9c75SAmlan Barua     default:
859bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
8607a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
86126fbe8dcSKarl Rupp 
8629566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
8639566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
8649566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8659566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8669566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8679566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8689566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8699566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
870bd59e6c6SAmlan Barua   #else
871c4762a1bSJed Brown       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
872f1ade23cSHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
873e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
87426fbe8dcSKarl Rupp 
875e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
87626fbe8dcSKarl Rupp 
8777a21806cSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
87826fbe8dcSKarl Rupp 
879e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
880e5d7f247SAmlan Barua 
881e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
882e5d7f247SAmlan Barua 
8839566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1));
8849566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2));
885e5d7f247SAmlan Barua 
886db4deed7SKarl Rupp       if (dim[ndim - 1] % 2 == 0) NM = 2;
887db4deed7SKarl Rupp       else NM = 1;
888e5d7f247SAmlan Barua 
8896971673cSAmlan Barua       j = low;
8903564f093SHong Zhang       for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
8916971673cSAmlan Barua         indx1[i] = local_0_start * partial_dim + i;
8926971673cSAmlan Barua         indx2[i] = j;
89326fbe8dcSKarl Rupp         if (k % dim[ndim - 1] == 0) j += NM;
8946971673cSAmlan Barua         j++;
8956971673cSAmlan Barua       }
8969566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
8979566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
8989566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8999566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9009566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9019566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
9029566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
9039566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
9049566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
9059566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
906bd59e6c6SAmlan Barua   #endif
9079496c9aeSAmlan Barua       break;
9083c3a9c75SAmlan Barua     }
9090cf2e031SBarry Smith #endif
910e81bb599SAmlan Barua   }
9113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9123c3a9c75SAmlan Barua }
9133c3a9c75SAmlan Barua 
9143564f093SHong Zhang /*@
91511a5261eSBarry Smith   VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.
9163564f093SHong Zhang 
917c3339decSBarry Smith   Collective
9183564f093SHong Zhang 
9193564f093SHong Zhang   Input Parameters:
92011a5261eSBarry Smith + A - `MATFFTW` matrix
9213564f093SHong Zhang - x - FFTW vector
9223564f093SHong Zhang 
9232fe279fdSBarry Smith   Output Parameter:
9243564f093SHong Zhang . y - PETSc vector
9253564f093SHong Zhang 
9263564f093SHong Zhang   Level: intermediate
9273564f093SHong Zhang 
92811a5261eSBarry Smith   Note:
92911a5261eSBarry Smith   While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
93011a5261eSBarry Smith   `VecScatterFFTWToPetsc()` removes those extra zeros.
9313564f093SHong Zhang 
9321cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
9333564f093SHong Zhang @*/
934d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
935d71ae5a4SJacob Faibussowitsch {
9363c3a9c75SAmlan Barua   PetscFunctionBegin;
937cac4c232SBarry Smith   PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
9383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9393c3a9c75SAmlan Barua }
94054efbe56SHong Zhang 
94166976f2fSJacob Faibussowitsch static PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
942d71ae5a4SJacob Faibussowitsch {
943ce94432eSBarry Smith   MPI_Comm    comm;
9445b3e41ffSAmlan Barua   Mat_FFT    *fft = (Mat_FFT *)A->data;
9459496c9aeSAmlan Barua   PetscInt    low;
9467a21806cSHong Zhang   PetscMPIInt rank, size;
9475b3e41ffSAmlan Barua   VecScatter  vecscat;
9480cf2e031SBarry Smith   IS          list1;
9499496c9aeSAmlan Barua 
9503564f093SHong Zhang   PetscFunctionBegin;
9519566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
9529566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
9539566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
9549566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRange(x, &low, NULL));
9555b3e41ffSAmlan Barua 
956e81bb599SAmlan Barua   if (size == 1) {
9579566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
9589566063dSJacob Faibussowitsch     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
9599566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9609566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9619566063dSJacob Faibussowitsch     PetscCall(VecScatterDestroy(&vecscat));
9629566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&list1));
963e81bb599SAmlan Barua 
9640cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
9653564f093SHong Zhang   } else {
9660cf2e031SBarry Smith     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
9670cf2e031SBarry Smith     PetscInt  ndim = fft->ndim, *dim = fft->dim;
9680cf2e031SBarry Smith     ptrdiff_t local_n0, local_0_start;
9690cf2e031SBarry Smith     ptrdiff_t local_n1, local_1_start;
9700cf2e031SBarry Smith     IS        list2;
9710cf2e031SBarry Smith   #if !defined(PETSC_USE_COMPLEX)
9720cf2e031SBarry Smith     PetscInt  i, j, k, partial_dim;
9730cf2e031SBarry Smith     PetscInt *indx1, *indx2, tempindx, tempindx1;
9740cf2e031SBarry Smith     PetscInt  NM;
9750cf2e031SBarry Smith     ptrdiff_t temp;
9760cf2e031SBarry Smith   #endif
977e81bb599SAmlan Barua     switch (ndim) {
978e81bb599SAmlan Barua     case 1:
97964657d84SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
9807a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
98126fbe8dcSKarl Rupp 
9829566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1));
9839566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2));
9849566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
9859566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9869566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9879566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
9889566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
9899566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
99064657d84SAmlan Barua   #else
9916a506ed8SAmlan Barua       SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
99264657d84SAmlan Barua   #endif
9935b3e41ffSAmlan Barua       break;
9945b3e41ffSAmlan Barua     case 2:
995bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
9967a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
99726fbe8dcSKarl Rupp 
9989566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
9999566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
10009566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
10019566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10029566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10039566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10049566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10059566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1006bd59e6c6SAmlan Barua   #else
10077a21806cSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
100826fbe8dcSKarl Rupp 
10099566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1));
10109566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2));
10115b3e41ffSAmlan Barua 
1012db4deed7SKarl Rupp       if (dim[1] % 2 == 0) NM = dim[1] + 2;
1013db4deed7SKarl Rupp       else NM = dim[1] + 1;
10145b3e41ffSAmlan Barua 
10155b3e41ffSAmlan Barua       for (i = 0; i < local_n0; i++) {
10165b3e41ffSAmlan Barua         for (j = 0; j < dim[1]; j++) {
10175b3e41ffSAmlan Barua           tempindx  = i * dim[1] + j;
10185b3e41ffSAmlan Barua           tempindx1 = i * NM + j;
101926fbe8dcSKarl Rupp 
10205b3e41ffSAmlan Barua           indx1[tempindx] = local_0_start * dim[1] + tempindx;
10215b3e41ffSAmlan Barua           indx2[tempindx] = low + tempindx1;
10225b3e41ffSAmlan Barua         }
10235b3e41ffSAmlan Barua       }
10245b3e41ffSAmlan Barua 
10259566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
10269566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
10275b3e41ffSAmlan Barua 
10289566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
10299566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10309566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10319566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10329566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10339566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
10349566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
10359566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1036bd59e6c6SAmlan Barua   #endif
10379496c9aeSAmlan Barua       break;
10385b3e41ffSAmlan Barua     case 3:
1039bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
10407a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
104126fbe8dcSKarl Rupp 
10429566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
10439566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
10449566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
10459566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10469566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10479566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10489566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10499566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1050bd59e6c6SAmlan Barua   #else
10517a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
105226fbe8dcSKarl Rupp 
10539566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1));
10549566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2));
105551d1eed7SAmlan Barua 
1056db4deed7SKarl Rupp       if (dim[2] % 2 == 0) NM = dim[2] + 2;
1057db4deed7SKarl Rupp       else NM = dim[2] + 1;
105851d1eed7SAmlan Barua 
105951d1eed7SAmlan Barua       for (i = 0; i < local_n0; i++) {
106051d1eed7SAmlan Barua         for (j = 0; j < dim[1]; j++) {
106151d1eed7SAmlan Barua           for (k = 0; k < dim[2]; k++) {
106251d1eed7SAmlan Barua             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
106351d1eed7SAmlan Barua             tempindx1 = i * dim[1] * NM + j * NM + k;
106426fbe8dcSKarl Rupp 
106551d1eed7SAmlan Barua             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
106651d1eed7SAmlan Barua             indx2[tempindx] = low + tempindx1;
106751d1eed7SAmlan Barua           }
106851d1eed7SAmlan Barua         }
106951d1eed7SAmlan Barua       }
107051d1eed7SAmlan Barua 
10719566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
10729566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
107351d1eed7SAmlan Barua 
10749566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
10759566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10769566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10779566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10789566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10799566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
10809566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
10819566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1082bd59e6c6SAmlan Barua   #endif
10839496c9aeSAmlan Barua       break;
10845b3e41ffSAmlan Barua     default:
1085bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
10867a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
108726fbe8dcSKarl Rupp 
10889566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
10899566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
10909566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
10919566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10929566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10939566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10949566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10959566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1096bd59e6c6SAmlan Barua   #else
1097ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
109826fbe8dcSKarl Rupp 
1099ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
110026fbe8dcSKarl Rupp 
1101c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
110226fbe8dcSKarl Rupp 
1103ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
1104ba6e06d0SAmlan Barua 
1105ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1106ba6e06d0SAmlan Barua 
11079566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1));
11089566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2));
1109ba6e06d0SAmlan Barua 
1110db4deed7SKarl Rupp       if (dim[ndim - 1] % 2 == 0) NM = 2;
1111db4deed7SKarl Rupp       else NM = 1;
1112ba6e06d0SAmlan Barua 
1113ba6e06d0SAmlan Barua       j = low;
11143564f093SHong Zhang       for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
1115ba6e06d0SAmlan Barua         indx1[i] = local_0_start * partial_dim + i;
1116ba6e06d0SAmlan Barua         indx2[i] = j;
11173564f093SHong Zhang         if (k % dim[ndim - 1] == 0) j += NM;
1118ba6e06d0SAmlan Barua         j++;
1119ba6e06d0SAmlan Barua       }
11209566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
11219566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
1122ba6e06d0SAmlan Barua 
11239566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
11249566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11259566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11269566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
11279566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
11289566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
11299566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
11309566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1131bd59e6c6SAmlan Barua   #endif
11329496c9aeSAmlan Barua       break;
11335b3e41ffSAmlan Barua     }
11340cf2e031SBarry Smith #endif
1135e81bb599SAmlan Barua   }
11363ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
11375b3e41ffSAmlan Barua }
11385b3e41ffSAmlan Barua 
1139350f1385SJose E. Roman /*MC
1140350f1385SJose E. Roman   MATFFTW -  "fftw" - Matrix type that provides FFT via the FFTW external package.
1141350f1385SJose E. Roman 
1142350f1385SJose E. Roman   Level: intermediate
1143350f1385SJose E. Roman 
11441cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()`
1145350f1385SJose E. Roman M*/
1146d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1147d71ae5a4SJacob Faibussowitsch {
1148ce94432eSBarry Smith   MPI_Comm    comm;
1149b2b77a04SHong Zhang   Mat_FFT    *fft = (Mat_FFT *)A->data;
1150b2b77a04SHong Zhang   Mat_FFTW   *fftw;
11510cf2e031SBarry Smith   PetscInt    ndim = fft->ndim, *dim = fft->dim;
11525274ed1bSHong Zhang   const char *plans[]  = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
11535274ed1bSHong Zhang   unsigned    iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1154b2b77a04SHong Zhang   PetscBool   flg;
11554f48bc67SAmlan Barua   PetscInt    p_flag, partial_dim = 1, ctr;
115611902ff2SHong Zhang   PetscMPIInt size, rank;
11579496c9aeSAmlan Barua   ptrdiff_t  *pdim;
11589496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11590cf2e031SBarry Smith   PetscInt tot_dim;
11609496c9aeSAmlan Barua #endif
11619496c9aeSAmlan Barua 
1162b2b77a04SHong Zhang   PetscFunctionBegin;
11639566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
11649566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
11659566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1166c92418ccSAmlan Barua 
11670cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
11681878d478SAmlan Barua   fftw_mpi_init();
11690cf2e031SBarry Smith #endif
117011902ff2SHong Zhang   pdim    = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
117111902ff2SHong Zhang   pdim[0] = dim[0];
11728ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11738ca4c034SAmlan Barua   tot_dim = 2 * dim[0];
11748ca4c034SAmlan Barua #endif
11753564f093SHong Zhang   for (ctr = 1; ctr < ndim; ctr++) {
11766882372aSHong Zhang     partial_dim *= dim[ctr];
117711902ff2SHong Zhang     pdim[ctr] = dim[ctr];
11788ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1179db4deed7SKarl Rupp     if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1180db4deed7SKarl Rupp     else tot_dim *= dim[ctr];
11818ca4c034SAmlan Barua #endif
11826882372aSHong Zhang   }
11833c3a9c75SAmlan Barua 
1184b2b77a04SHong Zhang   if (size == 1) {
1185e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
11869566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
11870cf2e031SBarry Smith     fft->n = fft->N;
1188e81bb599SAmlan Barua #else
11899566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
11900cf2e031SBarry Smith     fft->n       = tot_dim;
11910cf2e031SBarry Smith #endif
11920cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
11930cf2e031SBarry Smith   } else {
11940cf2e031SBarry Smith     ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
11950cf2e031SBarry Smith   #if !defined(PETSC_USE_COMPLEX)
11960cf2e031SBarry Smith     ptrdiff_t temp;
11970cf2e031SBarry Smith     PetscInt  N1;
1198e81bb599SAmlan Barua   #endif
1199e81bb599SAmlan Barua 
1200b2b77a04SHong Zhang     switch (ndim) {
1201b2b77a04SHong Zhang     case 1:
12023c3a9c75SAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
12033c3a9c75SAmlan Barua       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1204e5d7f247SAmlan Barua   #else
12057a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
12060cf2e031SBarry Smith       fft->n = (PetscInt)local_n0;
12079566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N));
1208e5d7f247SAmlan Barua   #endif
1209b2b77a04SHong Zhang       break;
1210b2b77a04SHong Zhang     case 2:
12115b3e41ffSAmlan Barua   #if defined(PETSC_USE_COMPLEX)
12127a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
12130cf2e031SBarry Smith       fft->n = (PetscInt)local_n0 * dim[1];
12149566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
12155b3e41ffSAmlan Barua   #else
1216c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
121726fbe8dcSKarl Rupp 
12180cf2e031SBarry Smith       fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
12199566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
12205b3e41ffSAmlan Barua   #endif
1221b2b77a04SHong Zhang       break;
1222b2b77a04SHong Zhang     case 3:
122351d1eed7SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
12247a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
122526fbe8dcSKarl Rupp 
12260cf2e031SBarry Smith       fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
12279566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
122851d1eed7SAmlan Barua   #else
1229c3eba89aSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
123026fbe8dcSKarl Rupp 
12310cf2e031SBarry Smith       fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
12329566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
123351d1eed7SAmlan Barua   #endif
1234b2b77a04SHong Zhang       break;
1235b2b77a04SHong Zhang     default:
1236b3a17365SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
12377a21806cSHong Zhang       fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);
123826fbe8dcSKarl Rupp 
12390cf2e031SBarry Smith       fft->n = (PetscInt)local_n0 * partial_dim;
12409566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1241b3a17365SAmlan Barua   #else
1242b3a17365SAmlan Barua       temp = pdim[ndim - 1];
124326fbe8dcSKarl Rupp 
1244b3a17365SAmlan Barua       pdim[ndim - 1] = temp / 2 + 1;
124526fbe8dcSKarl Rupp 
1246c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
124726fbe8dcSKarl Rupp 
12480cf2e031SBarry Smith       fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp;
12490cf2e031SBarry Smith       N1     = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);
125026fbe8dcSKarl Rupp 
1251b3a17365SAmlan Barua       pdim[ndim - 1] = temp;
125226fbe8dcSKarl Rupp 
12539566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1254b3a17365SAmlan Barua   #endif
1255b2b77a04SHong Zhang       break;
1256b2b77a04SHong Zhang     }
12570cf2e031SBarry Smith #endif
1258b2b77a04SHong Zhang   }
12592277172eSMark Adams   free(pdim);
12609566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
12614dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&fftw));
1262b2b77a04SHong Zhang   fft->data = (void *)fftw;
1263b2b77a04SHong Zhang 
12640c74a584SJed Brown   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1265e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
126626fbe8dcSKarl Rupp 
1267*f4f49eeaSPierre Jolivet   PetscCall(PetscMalloc1(ndim, &fftw->dim_fftw));
12688c1d5ab3SHong Zhang   if (size == 1) {
1269a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
1270a1b6d50cSHong Zhang     fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1271a1b6d50cSHong Zhang #else
12728c1d5ab3SHong Zhang     fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1273a1b6d50cSHong Zhang #endif
12748c1d5ab3SHong Zhang   }
127526fbe8dcSKarl Rupp 
1276b1301b2eSHong Zhang   for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];
1277c92418ccSAmlan Barua 
1278f4259b30SLisandro Dalcin   fftw->p_forward  = NULL;
1279f4259b30SLisandro Dalcin   fftw->p_backward = NULL;
1280b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1281b2b77a04SHong Zhang 
1282b2b77a04SHong Zhang   if (size == 1) {
1283b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1284b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
12850cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
1286b2b77a04SHong Zhang   } else {
1287b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1288b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
12890cf2e031SBarry Smith #endif
1290b2b77a04SHong Zhang   }
1291b2b77a04SHong Zhang   fft->matdestroy = MatDestroy_FFTW;
1292b2b77a04SHong Zhang   A->assembled    = PETSC_TRUE;
12934a52bad8SHong Zhang   A->preallocated = PETSC_TRUE;
129426fbe8dcSKarl Rupp 
12959566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
12969566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
12979566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));
1298b2b77a04SHong Zhang 
1299b2b77a04SHong Zhang   /* get runtime options */
1300d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
13019566063dSJacob Faibussowitsch   PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
13025f80ce2aSJacob Faibussowitsch   if (flg) fftw->p_flag = iplans[p_flag];
1303d0609cedSBarry Smith   PetscOptionsEnd();
13043ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1305b2b77a04SHong Zhang }
1306