xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision e8bdd1797048445815e3ddde65485dd6fd6336a7)
1b2b77a04SHong Zhang /*
2b2b77a04SHong Zhang     Provides an interface to the FFTW package.
3c4762a1bSJed Brown     Testing examples can be found in ~src/mat/tests
4b2b77a04SHong Zhang */
5b2b77a04SHong Zhang 
6c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/
7b2b77a04SHong Zhang EXTERN_C_BEGIN
80cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
9c6db04a5SJed Brown   #include <fftw3-mpi.h>
100cf2e031SBarry Smith #else
110cf2e031SBarry Smith   #include <fftw3.h>
120cf2e031SBarry Smith #endif
13b2b77a04SHong Zhang EXTERN_C_END
14b2b77a04SHong Zhang 
15b2b77a04SHong Zhang typedef struct {
16b9ae062cSHong Zhang   ptrdiff_t ndim_fftw, *dim_fftw;
17a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
18a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
19a1b6d50cSHong Zhang #else
208c1d5ab3SHong Zhang   fftw_iodim *iodims;
21a1b6d50cSHong Zhang #endif
22e5d7f247SAmlan Barua   PetscInt     partial_dim;
23b2b77a04SHong Zhang   fftw_plan    p_forward, p_backward;
24b2b77a04SHong Zhang   unsigned     p_flag;                                      /* planner flags, FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
25*e8bdd179SBarry Smith   PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw_execute() can only be executed for the arrays with which the plan was created */
26b2b77a04SHong Zhang } Mat_FFTW;
27b2b77a04SHong Zhang 
28b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec);
29b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec);
300cf2e031SBarry Smith extern PetscErrorCode MatDestroy_FFTW(Mat);
310cf2e031SBarry Smith extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
320cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
33b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec);
34b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec);
35b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
360cf2e031SBarry Smith #endif
37b2b77a04SHong Zhang 
38d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
39d71ae5a4SJacob Faibussowitsch {
40b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
41b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
42f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
43f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
44*e8bdd179SBarry Smith   Vec                xx;
451acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
46a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
47a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
48a1b6d50cSHong Zhang   #else
498c1d5ab3SHong Zhang   fftw_iodim *iodims;
50a1b6d50cSHong Zhang   #endif
511acd80e4SHong Zhang   PetscInt i;
521acd80e4SHong Zhang #endif
531acd80e4SHong Zhang   PetscInt ndim = fft->ndim, *dim = fft->dim;
54b2b77a04SHong Zhang 
55b2b77a04SHong Zhang   PetscFunctionBegin;
56*e8bdd179SBarry Smith   if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
57*e8bdd179SBarry Smith     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
58*e8bdd179SBarry Smith     PetscCall(VecDuplicate(x, &xx));
59*e8bdd179SBarry Smith     PetscCall(VecGetArrayRead(xx, &x_array));
60*e8bdd179SBarry Smith   } else {
619566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &x_array));
62*e8bdd179SBarry Smith   }
639566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
646aad120cSJose E. Roman   if (!fftw->p_forward) { /* create a plan, then execute it */
65b2b77a04SHong Zhang     switch (ndim) {
66b2b77a04SHong Zhang     case 1:
6758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
68b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
6958a912c5SAmlan Barua #else
7058a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
7158a912c5SAmlan Barua #endif
72b2b77a04SHong Zhang       break;
73b2b77a04SHong Zhang     case 2:
7458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
75b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
7658a912c5SAmlan Barua #else
7758a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
7858a912c5SAmlan Barua #endif
79b2b77a04SHong Zhang       break;
80b2b77a04SHong Zhang     case 3:
8158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
82b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
8358a912c5SAmlan Barua #else
8458a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
8558a912c5SAmlan Barua #endif
86b2b77a04SHong Zhang       break;
87b2b77a04SHong Zhang     default:
8858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
89a1b6d50cSHong Zhang       iodims = fftw->iodims;
90a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
918c1d5ab3SHong Zhang       if (ndim) {
92a1b6d50cSHong Zhang         iodims[ndim - 1].n  = (ptrdiff_t)dim[ndim - 1];
938c1d5ab3SHong Zhang         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
948c1d5ab3SHong Zhang         for (i = ndim - 2; i >= 0; --i) {
95a1b6d50cSHong Zhang           iodims[i].n  = (ptrdiff_t)dim[i];
968c1d5ab3SHong Zhang           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
978c1d5ab3SHong Zhang         }
988c1d5ab3SHong Zhang       }
99a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
100a1b6d50cSHong Zhang   #else
101a1b6d50cSHong Zhang       if (ndim) {
102a1b6d50cSHong Zhang         iodims[ndim - 1].n  = (int)dim[ndim - 1];
103a1b6d50cSHong Zhang         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
104a1b6d50cSHong Zhang         for (i = ndim - 2; i >= 0; --i) {
105a1b6d50cSHong Zhang           iodims[i].n  = (int)dim[i];
106a1b6d50cSHong Zhang           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
107a1b6d50cSHong Zhang         }
108a1b6d50cSHong Zhang       }
109a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
110a1b6d50cSHong Zhang   #endif
111a1b6d50cSHong Zhang 
11258a912c5SAmlan Barua #else
113a31b9140SHong Zhang       fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
11458a912c5SAmlan Barua #endif
115b2b77a04SHong Zhang       break;
116b2b77a04SHong Zhang     }
117*e8bdd179SBarry Smith     if (fftw->p_flag != FFTW_ESTIMATE) {
118*e8bdd179SBarry Smith       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
119*e8bdd179SBarry Smith       PetscCall(VecRestoreArrayRead(xx, &x_array));
120*e8bdd179SBarry Smith       PetscCall(VecDestroy(&xx));
121*e8bdd179SBarry Smith       PetscCall(VecGetArrayRead(x, &x_array));
122*e8bdd179SBarry Smith     } else {
123f4fca6d4SMatthew G. Knepley       fftw->finarray  = (PetscScalar *)x_array;
124b2b77a04SHong Zhang       fftw->foutarray = y_array;
125*e8bdd179SBarry Smith     }
126*e8bdd179SBarry Smith   }
127*e8bdd179SBarry Smith 
128b2b77a04SHong Zhang   if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
1297e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
130b2b77a04SHong Zhang     fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
1317e4bc134SDominic Meiser #else
1327e4bc134SDominic Meiser     fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
1337e4bc134SDominic Meiser #endif
134b2b77a04SHong Zhang   } else {
135b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
136b2b77a04SHong Zhang   }
1379566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
1389566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
1393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
140b2b77a04SHong Zhang }
141b2b77a04SHong Zhang 
142d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
143d71ae5a4SJacob Faibussowitsch {
144b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
145b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
146f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
147f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
148b2b77a04SHong Zhang   PetscInt           ndim = fft->ndim, *dim = fft->dim;
149*e8bdd179SBarry Smith   Vec                xx;
1501acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
151a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
152a1b6d50cSHong Zhang   fftw_iodim64 *iodims = fftw->iodims;
153a1b6d50cSHong Zhang   #else
154a1b6d50cSHong Zhang   fftw_iodim *iodims = fftw->iodims;
155a1b6d50cSHong Zhang   #endif
1561acd80e4SHong Zhang #endif
157b2b77a04SHong Zhang 
158b2b77a04SHong Zhang   PetscFunctionBegin;
159*e8bdd179SBarry Smith   if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
160*e8bdd179SBarry Smith     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
161*e8bdd179SBarry Smith     PetscCall(VecDuplicate(x, &xx));
162*e8bdd179SBarry Smith     PetscCall(VecGetArrayRead(xx, &x_array));
163*e8bdd179SBarry Smith   } else {
1649566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &x_array));
165*e8bdd179SBarry Smith   }
1669566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
1676aad120cSJose E. Roman   if (!fftw->p_backward) { /* create a plan, then execute it */
168b2b77a04SHong Zhang     switch (ndim) {
169b2b77a04SHong Zhang     case 1:
17058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
171b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
17258a912c5SAmlan Barua #else
173e81bb599SAmlan Barua       fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
17458a912c5SAmlan Barua #endif
175b2b77a04SHong Zhang       break;
176b2b77a04SHong Zhang     case 2:
17758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
178b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
17958a912c5SAmlan Barua #else
180e81bb599SAmlan Barua       fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
18158a912c5SAmlan Barua #endif
182b2b77a04SHong Zhang       break;
183b2b77a04SHong Zhang     case 3:
18458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
185b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
18658a912c5SAmlan Barua #else
187e81bb599SAmlan Barua       fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
18858a912c5SAmlan Barua #endif
189b2b77a04SHong Zhang       break;
190b2b77a04SHong Zhang     default:
19158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
192a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
193a1b6d50cSHong Zhang       fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
194a1b6d50cSHong Zhang   #else
1958c1d5ab3SHong Zhang       fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
196a1b6d50cSHong Zhang   #endif
19758a912c5SAmlan Barua #else
198a31b9140SHong Zhang       fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
19958a912c5SAmlan Barua #endif
200b2b77a04SHong Zhang       break;
201b2b77a04SHong Zhang     }
202*e8bdd179SBarry Smith     if (fftw->p_flag != FFTW_ESTIMATE) {
203*e8bdd179SBarry Smith       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
204*e8bdd179SBarry Smith       PetscCall(VecRestoreArrayRead(xx, &x_array));
205*e8bdd179SBarry Smith       PetscCall(VecDestroy(&xx));
206*e8bdd179SBarry Smith       PetscCall(VecGetArrayRead(x, &x_array));
207*e8bdd179SBarry Smith     } else {
208f4fca6d4SMatthew G. Knepley       fftw->binarray  = (PetscScalar *)x_array;
209b2b77a04SHong Zhang       fftw->boutarray = y_array;
210*e8bdd179SBarry Smith     }
211*e8bdd179SBarry Smith   }
212b2b77a04SHong Zhang   if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
2137e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
214b2b77a04SHong Zhang     fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
2157e4bc134SDominic Meiser #else
2167e4bc134SDominic Meiser     fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
2177e4bc134SDominic Meiser #endif
218b2b77a04SHong Zhang   } else {
2192f613bf5SBarry Smith     fftw_execute(fftw->p_backward);
220b2b77a04SHong Zhang   }
2219566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
2229566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
2233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
224b2b77a04SHong Zhang }
225b2b77a04SHong Zhang 
2260cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
227d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
228d71ae5a4SJacob Faibussowitsch {
229b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
230b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
231f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
232f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
233c92418ccSAmlan Barua   PetscInt           ndim = fft->ndim, *dim = fft->dim;
234ce94432eSBarry Smith   MPI_Comm           comm;
235*e8bdd179SBarry Smith   Vec                xx;
236b2b77a04SHong Zhang 
237b2b77a04SHong Zhang   PetscFunctionBegin;
2389566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
239*e8bdd179SBarry Smith   if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
240*e8bdd179SBarry Smith     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
241*e8bdd179SBarry Smith     PetscCall(VecDuplicate(x, &xx));
242*e8bdd179SBarry Smith     PetscCall(VecGetArrayRead(xx, &x_array));
243*e8bdd179SBarry Smith   } else {
2449566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &x_array));
245*e8bdd179SBarry Smith   }
2469566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
2476aad120cSJose E. Roman   if (!fftw->p_forward) { /* create a plan, then execute it */
248b2b77a04SHong Zhang     switch (ndim) {
249b2b77a04SHong Zhang     case 1:
2503c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
251b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
252ae0a50aaSHong Zhang   #else
2534f8276c3SHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
2543c3a9c75SAmlan Barua   #endif
255b2b77a04SHong Zhang       break;
256b2b77a04SHong Zhang     case 2:
25797504071SAmlan Barua   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
258b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2593c3a9c75SAmlan Barua   #else
2603c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2613c3a9c75SAmlan Barua   #endif
262b2b77a04SHong Zhang       break;
263b2b77a04SHong Zhang     case 3:
2643c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
265b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2663c3a9c75SAmlan Barua   #else
26751d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2683c3a9c75SAmlan Barua   #endif
269b2b77a04SHong Zhang       break;
270b2b77a04SHong Zhang     default:
2713c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
272c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2733c3a9c75SAmlan Barua   #else
2743c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2753c3a9c75SAmlan Barua   #endif
276b2b77a04SHong Zhang       break;
277b2b77a04SHong Zhang     }
278*e8bdd179SBarry Smith     if (fftw->p_flag != FFTW_ESTIMATE) {
279*e8bdd179SBarry Smith       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
280*e8bdd179SBarry Smith       PetscCall(VecRestoreArrayRead(xx, &x_array));
281*e8bdd179SBarry Smith       PetscCall(VecDestroy(&xx));
282*e8bdd179SBarry Smith       PetscCall(VecGetArrayRead(x, &x_array));
283*e8bdd179SBarry Smith     } else {
284f4fca6d4SMatthew G. Knepley       fftw->finarray  = (PetscScalar *)x_array;
285b2b77a04SHong Zhang       fftw->foutarray = y_array;
286*e8bdd179SBarry Smith     }
287*e8bdd179SBarry Smith   }
288b2b77a04SHong Zhang   if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
289b2b77a04SHong Zhang     fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
290b2b77a04SHong Zhang   } else {
291b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
292b2b77a04SHong Zhang   }
2939566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
2949566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
2953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
296b2b77a04SHong Zhang }
297b2b77a04SHong Zhang 
298d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
299d71ae5a4SJacob Faibussowitsch {
300b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
301b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
302f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
303f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
304c92418ccSAmlan Barua   PetscInt           ndim = fft->ndim, *dim = fft->dim;
305ce94432eSBarry Smith   MPI_Comm           comm;
306*e8bdd179SBarry Smith   Vec                xx;
307b2b77a04SHong Zhang 
308b2b77a04SHong Zhang   PetscFunctionBegin;
3099566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
310*e8bdd179SBarry Smith   if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
311*e8bdd179SBarry Smith     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
312*e8bdd179SBarry Smith     PetscCall(VecDuplicate(x, &xx));
313*e8bdd179SBarry Smith     PetscCall(VecGetArrayRead(xx, &x_array));
314*e8bdd179SBarry Smith   } else {
3159566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &x_array));
316*e8bdd179SBarry Smith   }
3179566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
3186aad120cSJose E. Roman   if (!fftw->p_backward) { /* create a plan, then execute it */
319b2b77a04SHong Zhang     switch (ndim) {
320b2b77a04SHong Zhang     case 1:
3213c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
322b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
323ae0a50aaSHong Zhang   #else
3244f8276c3SHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
3253c3a9c75SAmlan Barua   #endif
326b2b77a04SHong Zhang       break;
327b2b77a04SHong Zhang     case 2:
32897504071SAmlan Barua   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
329b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
3303c3a9c75SAmlan Barua   #else
3313c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3323c3a9c75SAmlan Barua   #endif
333b2b77a04SHong Zhang       break;
334b2b77a04SHong Zhang     case 3:
3353c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
336b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
3373c3a9c75SAmlan Barua   #else
3383c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3393c3a9c75SAmlan Barua   #endif
340b2b77a04SHong Zhang       break;
341b2b77a04SHong Zhang     default:
3423c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
343c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
3443c3a9c75SAmlan Barua   #else
3453c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3463c3a9c75SAmlan Barua   #endif
347b2b77a04SHong Zhang       break;
348b2b77a04SHong Zhang     }
349*e8bdd179SBarry Smith     if (fftw->p_flag != FFTW_ESTIMATE) {
350*e8bdd179SBarry Smith       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
351*e8bdd179SBarry Smith       PetscCall(VecRestoreArrayRead(xx, &x_array));
352*e8bdd179SBarry Smith       PetscCall(VecDestroy(&xx));
353*e8bdd179SBarry Smith       PetscCall(VecGetArrayRead(x, &x_array));
354*e8bdd179SBarry Smith     } else {
355f4fca6d4SMatthew G. Knepley       fftw->binarray  = (PetscScalar *)x_array;
356b2b77a04SHong Zhang       fftw->boutarray = y_array;
357*e8bdd179SBarry Smith     }
358*e8bdd179SBarry Smith   }
359b2b77a04SHong Zhang   if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
360b2b77a04SHong Zhang     fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
361b2b77a04SHong Zhang   } else {
3622f613bf5SBarry Smith     fftw_execute(fftw->p_backward);
363b2b77a04SHong Zhang   }
3649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
3659566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
3663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
367b2b77a04SHong Zhang }
3680cf2e031SBarry Smith #endif
369b2b77a04SHong Zhang 
370d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_FFTW(Mat A)
371d71ae5a4SJacob Faibussowitsch {
372b2b77a04SHong Zhang   Mat_FFT  *fft  = (Mat_FFT *)A->data;
373b2b77a04SHong Zhang   Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
374b2b77a04SHong Zhang 
375b2b77a04SHong Zhang   PetscFunctionBegin;
376b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
377b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
378ad540459SPierre Jolivet   if (fftw->iodims) free(fftw->iodims);
3799566063dSJacob Faibussowitsch   PetscCall(PetscFree(fftw->dim_fftw));
3809566063dSJacob Faibussowitsch   PetscCall(PetscFree(fft->data));
3812e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
3822e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
3832e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
3840cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
3856ccf0b3eSHong Zhang   fftw_mpi_cleanup();
3860cf2e031SBarry Smith #endif
3873ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
388b2b77a04SHong Zhang }
389b2b77a04SHong Zhang 
3900cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
391c6db04a5SJed Brown   #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I  "petscvec.h"   I*/
392d71ae5a4SJacob Faibussowitsch PetscErrorCode VecDestroy_MPIFFTW(Vec v)
393d71ae5a4SJacob Faibussowitsch {
394b2b77a04SHong Zhang   PetscScalar *array;
395b2b77a04SHong Zhang 
396b2b77a04SHong Zhang   PetscFunctionBegin;
3979566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &array));
3982f613bf5SBarry Smith   fftw_free((fftw_complex *)array);
3999566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &array));
4009566063dSJacob Faibussowitsch   PetscCall(VecDestroy_MPI(v));
4013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
402b2b77a04SHong Zhang }
4030cf2e031SBarry Smith #endif
404b2b77a04SHong Zhang 
4050cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
406d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
407d71ae5a4SJacob Faibussowitsch {
4085b113e21Ss-sajid-ali   Mat A;
4095b113e21Ss-sajid-ali 
4105b113e21Ss-sajid-ali   PetscFunctionBegin;
4119566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
4129566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
4133ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4145b113e21Ss-sajid-ali }
4155b113e21Ss-sajid-ali 
416d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
417d71ae5a4SJacob Faibussowitsch {
4185b113e21Ss-sajid-ali   Mat A;
4195b113e21Ss-sajid-ali 
4205b113e21Ss-sajid-ali   PetscFunctionBegin;
4219566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
4229566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
4233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4245b113e21Ss-sajid-ali }
4255b113e21Ss-sajid-ali 
426d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
427d71ae5a4SJacob Faibussowitsch {
4285b113e21Ss-sajid-ali   Mat A;
4295b113e21Ss-sajid-ali 
4305b113e21Ss-sajid-ali   PetscFunctionBegin;
4319566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
4329566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
4333ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4345b113e21Ss-sajid-ali }
4350cf2e031SBarry Smith #endif
4365b113e21Ss-sajid-ali 
4374be45526SHong Zhang /*@
4382a7a6963SBarry Smith   MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
43911a5261eSBarry Smith   parallel layout determined by `MATFFTW`
4404f7415efSAmlan Barua 
441c3339decSBarry Smith   Collective
4424f7415efSAmlan Barua 
4434f7415efSAmlan Barua   Input Parameter:
4443564f093SHong Zhang . A - the matrix
4454f7415efSAmlan Barua 
446d8d19677SJose E. Roman   Output Parameters:
447cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW
4486b867d5aSJose E. Roman . y - (optional) output vector of forward FFTW
449cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW
4504f7415efSAmlan Barua 
4512ef1f0ffSBarry Smith   Options Database Key:
4522ef1f0ffSBarry Smith . -mat_fftw_plannerflags - set FFTW planner flags
4532ef1f0ffSBarry Smith 
4544f7415efSAmlan Barua   Level: advanced
4553564f093SHong Zhang 
45611a5261eSBarry Smith   Notes:
45711a5261eSBarry Smith   The parallel layout of output of forward FFTW is always same as the input
45897504071SAmlan Barua   of the backward FFTW. But parallel layout of the input vector of forward
45997504071SAmlan Barua   FFTW might not be same as the output of backward FFTW.
46011a5261eSBarry Smith 
46111a5261eSBarry Smith   We need to provide enough space while doing parallel real transform.
46297504071SAmlan Barua   We need to pad extra zeros at the end of last dimension. For this reason the one needs to
46397504071SAmlan Barua   invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
464e0ec536eSAmlan Barua   last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
465e0ec536eSAmlan Barua   depends on if the last dimension is even or odd. If the last dimension is even need to pad two
466e0ec536eSAmlan Barua   zeros if it is odd only one zero is needed.
46711a5261eSBarry Smith 
468e0ec536eSAmlan Barua   Lastly one needs some scratch space at the end of data set in each process. alloc_local
469e0ec536eSAmlan Barua   figures out how much space is needed, i.e. it figures out the data+scratch space for
470e0ec536eSAmlan Barua   each processor and returns that.
4714f7415efSAmlan Barua 
472fe59aa6dSJacob Faibussowitsch   Developer Notes:
47311a5261eSBarry Smith   How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?
47411a5261eSBarry Smith 
4751cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
4764be45526SHong Zhang @*/
477d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
478d71ae5a4SJacob Faibussowitsch {
4794be45526SHong Zhang   PetscFunctionBegin;
480cac4c232SBarry Smith   PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
4813ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4824be45526SHong Zhang }
4834be45526SHong Zhang 
484d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
485d71ae5a4SJacob Faibussowitsch {
4864f7415efSAmlan Barua   PetscMPIInt size, rank;
487ce94432eSBarry Smith   MPI_Comm    comm;
4884f7415efSAmlan Barua   Mat_FFT    *fft = (Mat_FFT *)A->data;
4894f7415efSAmlan Barua 
4904f7415efSAmlan Barua   PetscFunctionBegin;
4914f7415efSAmlan Barua   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4924f7415efSAmlan Barua   PetscValidType(A, 1);
4939566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
4944f7415efSAmlan Barua 
4959566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
4969566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4974f7415efSAmlan Barua   if (size == 1) { /* sequential case */
4984f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4999566063dSJacob Faibussowitsch     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
5009566063dSJacob Faibussowitsch     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
5019566063dSJacob Faibussowitsch     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
5024f7415efSAmlan Barua #else
5039566063dSJacob Faibussowitsch     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
5049566063dSJacob Faibussowitsch     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
5059566063dSJacob Faibussowitsch     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
5064f7415efSAmlan Barua #endif
5070cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
50897504071SAmlan Barua   } else { /* parallel cases */
5090cf2e031SBarry Smith     Mat_FFTW     *fftw = (Mat_FFTW *)fft->data;
5100cf2e031SBarry Smith     PetscInt      ndim = fft->ndim, *dim = fft->dim;
5114f7415efSAmlan Barua     ptrdiff_t     alloc_local, local_n0, local_0_start;
5129496c9aeSAmlan Barua     ptrdiff_t     local_n1;
5139496c9aeSAmlan Barua     fftw_complex *data_fout;
5149496c9aeSAmlan Barua     ptrdiff_t     local_1_start;
5159496c9aeSAmlan Barua   #if defined(PETSC_USE_COMPLEX)
5169496c9aeSAmlan Barua     fftw_complex *data_fin, *data_bout;
5179496c9aeSAmlan Barua   #else
5184f7415efSAmlan Barua     double   *data_finr, *data_boutr;
5196ccf0b3eSHong Zhang     PetscInt  n1, N1;
5209496c9aeSAmlan Barua     ptrdiff_t temp;
5219496c9aeSAmlan Barua   #endif
5229496c9aeSAmlan Barua 
5234f7415efSAmlan Barua     switch (ndim) {
5244f7415efSAmlan Barua     case 1:
52557625b84SAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
5264f8276c3SHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
52757625b84SAmlan Barua   #else
52857625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
52957625b84SAmlan Barua       if (fin) {
53057625b84SAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5319566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin));
5329566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5335b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
53457625b84SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
53557625b84SAmlan Barua       }
53657625b84SAmlan Barua       if (fout) {
53757625b84SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5389566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout));
5399566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5405b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
54157625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
54257625b84SAmlan Barua       }
54357625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
54457625b84SAmlan Barua       if (bout) {
54557625b84SAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5469566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout));
5479566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5485b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
54957625b84SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
55057625b84SAmlan Barua       }
55157625b84SAmlan Barua       break;
55257625b84SAmlan Barua   #endif
5534f7415efSAmlan Barua     case 2:
55497504071SAmlan Barua   #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
5554f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
5569371c9d4SSatish Balay       N1          = 2 * dim[0] * (dim[1] / 2 + 1);
5579371c9d4SSatish Balay       n1          = 2 * local_n0 * (dim[1] / 2 + 1);
5584f7415efSAmlan Barua       if (fin) {
5594f7415efSAmlan Barua         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
5609566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
5619566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5625b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5634f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5644f7415efSAmlan Barua       }
56557625b84SAmlan Barua       if (fout) {
56657625b84SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5679566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
5689566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5695b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
57057625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
57157625b84SAmlan Barua       }
5725b113e21Ss-sajid-ali       if (bout) {
5735b113e21Ss-sajid-ali         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
5749566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
5759566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5765b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5775b113e21Ss-sajid-ali         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5785b113e21Ss-sajid-ali       }
5794f7415efSAmlan Barua   #else
5804f7415efSAmlan Barua       /* Get local size */
5814f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
5824f7415efSAmlan Barua       if (fin) {
5834f7415efSAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5849566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
5859566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5865b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5874f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5884f7415efSAmlan Barua       }
5894f7415efSAmlan Barua       if (fout) {
5904f7415efSAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5919566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
5929566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5935b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
5944f7415efSAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5954f7415efSAmlan Barua       }
5964f7415efSAmlan Barua       if (bout) {
5974f7415efSAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5989566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
5999566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6005b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6014f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6024f7415efSAmlan Barua       }
6034f7415efSAmlan Barua   #endif
6044f7415efSAmlan Barua       break;
6054f7415efSAmlan Barua     case 3:
6064f7415efSAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
6074f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
6089371c9d4SSatish Balay       N1          = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1);
6099371c9d4SSatish Balay       n1          = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1);
6104f7415efSAmlan Barua       if (fin) {
6114f7415efSAmlan Barua         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6129566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
6139566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6145b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6154f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6164f7415efSAmlan Barua       }
6175b113e21Ss-sajid-ali       if (fout) {
6185b113e21Ss-sajid-ali         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6199566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
6209566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6215b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6225b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6235b113e21Ss-sajid-ali       }
6244f7415efSAmlan Barua       if (bout) {
6254f7415efSAmlan Barua         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6269566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
6279566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6285b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6294f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6304f7415efSAmlan Barua       }
6314f7415efSAmlan Barua   #else
6320c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
6330c9b18e4SAmlan Barua       if (fin) {
6340c9b18e4SAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6359566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
6369566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6375b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6380c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6390c9b18e4SAmlan Barua       }
6400c9b18e4SAmlan Barua       if (fout) {
6410c9b18e4SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6429566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
6439566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6445b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6450c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6460c9b18e4SAmlan Barua       }
6470c9b18e4SAmlan Barua       if (bout) {
6480c9b18e4SAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6499566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
6509566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6515b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6520c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6530c9b18e4SAmlan Barua       }
6544f7415efSAmlan Barua   #endif
6554f7415efSAmlan Barua       break;
6564f7415efSAmlan Barua     default:
6574f7415efSAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
6584f7415efSAmlan Barua       temp                                  = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
6594f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
6604f7415efSAmlan Barua       alloc_local                           = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
661f4f49eeaSPierre Jolivet       N1                                    = 2 * fft->N * (PetscInt)(fftw->dim_fftw[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
6624f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
6634f7415efSAmlan Barua 
6644f7415efSAmlan Barua       if (fin) {
6654f7415efSAmlan Barua         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6669566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
6679566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6685b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6694f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6704f7415efSAmlan Barua       }
6715b113e21Ss-sajid-ali       if (fout) {
6725b113e21Ss-sajid-ali         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6739566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
6749566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6755b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6765b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6775b113e21Ss-sajid-ali       }
6784f7415efSAmlan Barua       if (bout) {
6794f7415efSAmlan Barua         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6809566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
6819566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6825b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6839496c9aeSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6844f7415efSAmlan Barua       }
6854f7415efSAmlan Barua   #else
6860c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
6870c9b18e4SAmlan Barua       if (fin) {
6880c9b18e4SAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6899566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
6909566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6915b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6920c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6930c9b18e4SAmlan Barua       }
6940c9b18e4SAmlan Barua       if (fout) {
6950c9b18e4SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6969566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
6979566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6985b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6990c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
7000c9b18e4SAmlan Barua       }
7010c9b18e4SAmlan Barua       if (bout) {
7020c9b18e4SAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
7039566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
7049566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
7055b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
7060c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
7070c9b18e4SAmlan Barua       }
7084f7415efSAmlan Barua   #endif
7094f7415efSAmlan Barua       break;
7104f7415efSAmlan Barua     }
711f9d91177SJunchao Zhang     /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
712f9d91177SJunchao Zhang        v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
713da81f932SPierre Jolivet        memory leaks. We void these pointers here to avoid accident uses.
714f9d91177SJunchao Zhang     */
715f9d91177SJunchao Zhang     if (fin) (*fin)->ops->replacearray = NULL;
716f9d91177SJunchao Zhang     if (fout) (*fout)->ops->replacearray = NULL;
717f9d91177SJunchao Zhang     if (bout) (*bout)->ops->replacearray = NULL;
7180cf2e031SBarry Smith #endif
7194f7415efSAmlan Barua   }
7203ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
7214f7415efSAmlan Barua }
7224f7415efSAmlan Barua 
7233564f093SHong Zhang /*@
72411a5261eSBarry Smith   VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.
72554efbe56SHong Zhang 
726c3339decSBarry Smith   Collective
7273564f093SHong Zhang 
7283564f093SHong Zhang   Input Parameters:
7293564f093SHong Zhang + A - FFTW matrix
7303564f093SHong Zhang - x - the PETSc vector
7313564f093SHong Zhang 
7322fe279fdSBarry Smith   Output Parameter:
7333564f093SHong Zhang . y - the FFTW vector
7343564f093SHong Zhang 
735b2b77a04SHong Zhang   Level: intermediate
7363564f093SHong Zhang 
73711a5261eSBarry Smith   Note:
73811a5261eSBarry Smith   For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
73997504071SAmlan Barua   one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
74097504071SAmlan Barua   zeros. This routine does that job by scattering operation.
74197504071SAmlan Barua 
7421cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
7433564f093SHong Zhang @*/
744d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
745d71ae5a4SJacob Faibussowitsch {
7463564f093SHong Zhang   PetscFunctionBegin;
747cac4c232SBarry Smith   PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
7483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
7493564f093SHong Zhang }
7503c3a9c75SAmlan Barua 
75166976f2fSJacob Faibussowitsch static PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
752d71ae5a4SJacob Faibussowitsch {
753ce94432eSBarry Smith   MPI_Comm    comm;
7543c3a9c75SAmlan Barua   Mat_FFT    *fft = (Mat_FFT *)A->data;
7559496c9aeSAmlan Barua   PetscInt    low;
7567a21806cSHong Zhang   PetscMPIInt rank, size;
7577a21806cSHong Zhang   PetscInt    vsize, vsize1;
7583c3a9c75SAmlan Barua   VecScatter  vecscat;
7590cf2e031SBarry Smith   IS          list1;
7603c3a9c75SAmlan Barua 
7613564f093SHong Zhang   PetscFunctionBegin;
7629566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
7639566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
7649566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
7659566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRange(y, &low, NULL));
7663c3a9c75SAmlan Barua 
7673564f093SHong Zhang   if (size == 1) {
7689566063dSJacob Faibussowitsch     PetscCall(VecGetSize(x, &vsize));
7699566063dSJacob Faibussowitsch     PetscCall(VecGetSize(y, &vsize1));
7709566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
7719566063dSJacob Faibussowitsch     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
7729566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7739566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7749566063dSJacob Faibussowitsch     PetscCall(VecScatterDestroy(&vecscat));
7759566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&list1));
7760cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
7773564f093SHong Zhang   } else {
7780cf2e031SBarry Smith     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
7790cf2e031SBarry Smith     PetscInt  ndim = fft->ndim, *dim = fft->dim;
7800cf2e031SBarry Smith     ptrdiff_t local_n0, local_0_start;
7810cf2e031SBarry Smith     ptrdiff_t local_n1, local_1_start;
7820cf2e031SBarry Smith     IS        list2;
7830cf2e031SBarry Smith   #if !defined(PETSC_USE_COMPLEX)
7840cf2e031SBarry Smith     PetscInt  i, j, k, partial_dim;
7850cf2e031SBarry Smith     PetscInt *indx1, *indx2, tempindx, tempindx1;
7860cf2e031SBarry Smith     PetscInt  NM;
7870cf2e031SBarry Smith     ptrdiff_t temp;
7880cf2e031SBarry Smith   #endif
7890cf2e031SBarry Smith 
7903c3a9c75SAmlan Barua     switch (ndim) {
7913c3a9c75SAmlan Barua     case 1:
79264657d84SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
7937a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
79426fbe8dcSKarl Rupp 
7959566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1));
7969566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2));
7979566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
7989566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7999566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8009566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8019566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8029566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
80364657d84SAmlan Barua   #else
804e5d7f247SAmlan Barua       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
80564657d84SAmlan Barua   #endif
8063c3a9c75SAmlan Barua       break;
8073c3a9c75SAmlan Barua     case 2:
808bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
8097a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
81026fbe8dcSKarl Rupp 
8119566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
8129566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
8139566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8149566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8159566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8169566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8179566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8189566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
819bd59e6c6SAmlan Barua   #else
820c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
82126fbe8dcSKarl Rupp 
82257508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
82357508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));
8243c3a9c75SAmlan Barua 
8253564f093SHong Zhang       if (dim[1] % 2 == 0) {
8263c3a9c75SAmlan Barua         NM = dim[1] + 2;
8273564f093SHong Zhang       } else {
8283c3a9c75SAmlan Barua         NM = dim[1] + 1;
8293564f093SHong Zhang       }
8303c3a9c75SAmlan Barua       for (i = 0; i < local_n0; i++) {
8313c3a9c75SAmlan Barua         for (j = 0; j < dim[1]; j++) {
8323c3a9c75SAmlan Barua           tempindx  = i * dim[1] + j;
8333c3a9c75SAmlan Barua           tempindx1 = i * NM + j;
83426fbe8dcSKarl Rupp 
8355b3e41ffSAmlan Barua           indx1[tempindx] = local_0_start * dim[1] + tempindx;
8363c3a9c75SAmlan Barua           indx2[tempindx] = low + tempindx1;
8373c3a9c75SAmlan Barua         }
8383c3a9c75SAmlan Barua       }
8393c3a9c75SAmlan Barua 
8409566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
8419566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
8423c3a9c75SAmlan Barua 
8439566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8449566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8459566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8469566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8479566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8489566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
8499566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
8509566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
851bd59e6c6SAmlan Barua   #endif
8529496c9aeSAmlan Barua       break;
8533c3a9c75SAmlan Barua 
8543c3a9c75SAmlan Barua     case 3:
855bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
8567a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
85726fbe8dcSKarl Rupp 
8589566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
8599566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
8609566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8619566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8629566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8639566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8649566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8659566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
866bd59e6c6SAmlan Barua   #else
867c4762a1bSJed Brown       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
868f1ade23cSHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
8697a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
87026fbe8dcSKarl Rupp 
87157508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
87257508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));
87351d1eed7SAmlan Barua 
874db4deed7SKarl Rupp       if (dim[2] % 2 == 0) NM = dim[2] + 2;
875db4deed7SKarl Rupp       else NM = dim[2] + 1;
87651d1eed7SAmlan Barua 
87751d1eed7SAmlan Barua       for (i = 0; i < local_n0; i++) {
87851d1eed7SAmlan Barua         for (j = 0; j < dim[1]; j++) {
87951d1eed7SAmlan Barua           for (k = 0; k < dim[2]; k++) {
88051d1eed7SAmlan Barua             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
88151d1eed7SAmlan Barua             tempindx1 = i * dim[1] * NM + j * NM + k;
88226fbe8dcSKarl Rupp 
88351d1eed7SAmlan Barua             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
88451d1eed7SAmlan Barua             indx2[tempindx] = low + tempindx1;
88551d1eed7SAmlan Barua           }
88651d1eed7SAmlan Barua         }
88751d1eed7SAmlan Barua       }
88851d1eed7SAmlan Barua 
8899566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
8909566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
8919566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8929566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8939566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8949566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8959566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8969566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
8979566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
8989566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
899bd59e6c6SAmlan Barua   #endif
9009496c9aeSAmlan Barua       break;
9013c3a9c75SAmlan Barua 
9023c3a9c75SAmlan Barua     default:
903bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
9047a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
90526fbe8dcSKarl Rupp 
9069566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
9079566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
9089566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
9099566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9109566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9119566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
9129566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
9139566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
914bd59e6c6SAmlan Barua   #else
915c4762a1bSJed Brown       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
916f1ade23cSHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
917e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
91826fbe8dcSKarl Rupp 
919e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
92026fbe8dcSKarl Rupp 
9217a21806cSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
92226fbe8dcSKarl Rupp 
923e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
924e5d7f247SAmlan Barua 
925e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
926e5d7f247SAmlan Barua 
92757508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
92857508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));
929e5d7f247SAmlan Barua 
930db4deed7SKarl Rupp       if (dim[ndim - 1] % 2 == 0) NM = 2;
931db4deed7SKarl Rupp       else NM = 1;
932e5d7f247SAmlan Barua 
9336971673cSAmlan Barua       j = low;
93457508eceSPierre Jolivet       for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
9356971673cSAmlan Barua         indx1[i] = local_0_start * partial_dim + i;
9366971673cSAmlan Barua         indx2[i] = j;
93726fbe8dcSKarl Rupp         if (k % dim[ndim - 1] == 0) j += NM;
9386971673cSAmlan Barua         j++;
9396971673cSAmlan Barua       }
9409566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
9419566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
9429566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
9439566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9449566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9459566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
9469566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
9479566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
9489566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
9499566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
950bd59e6c6SAmlan Barua   #endif
9519496c9aeSAmlan Barua       break;
9523c3a9c75SAmlan Barua     }
9530cf2e031SBarry Smith #endif
954e81bb599SAmlan Barua   }
9553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9563c3a9c75SAmlan Barua }
9573c3a9c75SAmlan Barua 
9583564f093SHong Zhang /*@
95911a5261eSBarry Smith   VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.
9603564f093SHong Zhang 
961c3339decSBarry Smith   Collective
9623564f093SHong Zhang 
9633564f093SHong Zhang   Input Parameters:
96411a5261eSBarry Smith + A - `MATFFTW` matrix
9653564f093SHong Zhang - x - FFTW vector
9663564f093SHong Zhang 
9672fe279fdSBarry Smith   Output Parameter:
9683564f093SHong Zhang . y - PETSc vector
9693564f093SHong Zhang 
9703564f093SHong Zhang   Level: intermediate
9713564f093SHong Zhang 
97211a5261eSBarry Smith   Note:
97311a5261eSBarry Smith   While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
97411a5261eSBarry Smith   `VecScatterFFTWToPetsc()` removes those extra zeros.
9753564f093SHong Zhang 
9761cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
9773564f093SHong Zhang @*/
978d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
979d71ae5a4SJacob Faibussowitsch {
9803c3a9c75SAmlan Barua   PetscFunctionBegin;
981cac4c232SBarry Smith   PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
9823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9833c3a9c75SAmlan Barua }
98454efbe56SHong Zhang 
98566976f2fSJacob Faibussowitsch static PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
986d71ae5a4SJacob Faibussowitsch {
987ce94432eSBarry Smith   MPI_Comm    comm;
9885b3e41ffSAmlan Barua   Mat_FFT    *fft = (Mat_FFT *)A->data;
9899496c9aeSAmlan Barua   PetscInt    low;
9907a21806cSHong Zhang   PetscMPIInt rank, size;
9915b3e41ffSAmlan Barua   VecScatter  vecscat;
9920cf2e031SBarry Smith   IS          list1;
9939496c9aeSAmlan Barua 
9943564f093SHong Zhang   PetscFunctionBegin;
9959566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
9969566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
9979566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
9989566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRange(x, &low, NULL));
9995b3e41ffSAmlan Barua 
1000e81bb599SAmlan Barua   if (size == 1) {
10019566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
10029566063dSJacob Faibussowitsch     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
10039566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10049566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10059566063dSJacob Faibussowitsch     PetscCall(VecScatterDestroy(&vecscat));
10069566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&list1));
1007e81bb599SAmlan Barua 
10080cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
10093564f093SHong Zhang   } else {
10100cf2e031SBarry Smith     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
10110cf2e031SBarry Smith     PetscInt  ndim = fft->ndim, *dim = fft->dim;
10120cf2e031SBarry Smith     ptrdiff_t local_n0, local_0_start;
10130cf2e031SBarry Smith     ptrdiff_t local_n1, local_1_start;
10140cf2e031SBarry Smith     IS        list2;
10150cf2e031SBarry Smith   #if !defined(PETSC_USE_COMPLEX)
10160cf2e031SBarry Smith     PetscInt  i, j, k, partial_dim;
10170cf2e031SBarry Smith     PetscInt *indx1, *indx2, tempindx, tempindx1;
10180cf2e031SBarry Smith     PetscInt  NM;
10190cf2e031SBarry Smith     ptrdiff_t temp;
10200cf2e031SBarry Smith   #endif
1021e81bb599SAmlan Barua     switch (ndim) {
1022e81bb599SAmlan Barua     case 1:
102364657d84SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
10247a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
102526fbe8dcSKarl Rupp 
10269566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1));
10279566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2));
10289566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
10299566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10309566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10319566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10329566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10339566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
103464657d84SAmlan Barua   #else
10356a506ed8SAmlan Barua       SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
103664657d84SAmlan Barua   #endif
10375b3e41ffSAmlan Barua       break;
10385b3e41ffSAmlan Barua     case 2:
1039bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
10407a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
104126fbe8dcSKarl Rupp 
10429566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
10439566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
10449566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
10459566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10469566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10479566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10489566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10499566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1050bd59e6c6SAmlan Barua   #else
10517a21806cSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
105226fbe8dcSKarl Rupp 
105357508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
105457508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));
10555b3e41ffSAmlan Barua 
1056db4deed7SKarl Rupp       if (dim[1] % 2 == 0) NM = dim[1] + 2;
1057db4deed7SKarl Rupp       else NM = dim[1] + 1;
10585b3e41ffSAmlan Barua 
10595b3e41ffSAmlan Barua       for (i = 0; i < local_n0; i++) {
10605b3e41ffSAmlan Barua         for (j = 0; j < dim[1]; j++) {
10615b3e41ffSAmlan Barua           tempindx  = i * dim[1] + j;
10625b3e41ffSAmlan Barua           tempindx1 = i * NM + j;
106326fbe8dcSKarl Rupp 
10645b3e41ffSAmlan Barua           indx1[tempindx] = local_0_start * dim[1] + tempindx;
10655b3e41ffSAmlan Barua           indx2[tempindx] = low + tempindx1;
10665b3e41ffSAmlan Barua         }
10675b3e41ffSAmlan Barua       }
10685b3e41ffSAmlan Barua 
10699566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
10709566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
10715b3e41ffSAmlan Barua 
10729566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
10739566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10749566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10759566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10769566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10779566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
10789566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
10799566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1080bd59e6c6SAmlan Barua   #endif
10819496c9aeSAmlan Barua       break;
10825b3e41ffSAmlan Barua     case 3:
1083bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
10847a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
108526fbe8dcSKarl Rupp 
10869566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
10879566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
10889566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
10899566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10909566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10919566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10929566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10939566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1094bd59e6c6SAmlan Barua   #else
10957a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
109626fbe8dcSKarl Rupp 
109757508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
109857508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));
109951d1eed7SAmlan Barua 
1100db4deed7SKarl Rupp       if (dim[2] % 2 == 0) NM = dim[2] + 2;
1101db4deed7SKarl Rupp       else NM = dim[2] + 1;
110251d1eed7SAmlan Barua 
110351d1eed7SAmlan Barua       for (i = 0; i < local_n0; i++) {
110451d1eed7SAmlan Barua         for (j = 0; j < dim[1]; j++) {
110551d1eed7SAmlan Barua           for (k = 0; k < dim[2]; k++) {
110651d1eed7SAmlan Barua             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
110751d1eed7SAmlan Barua             tempindx1 = i * dim[1] * NM + j * NM + k;
110826fbe8dcSKarl Rupp 
110951d1eed7SAmlan Barua             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
111051d1eed7SAmlan Barua             indx2[tempindx] = low + tempindx1;
111151d1eed7SAmlan Barua           }
111251d1eed7SAmlan Barua         }
111351d1eed7SAmlan Barua       }
111451d1eed7SAmlan Barua 
11159566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
11169566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
111751d1eed7SAmlan Barua 
11189566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
11199566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11209566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11219566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
11229566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
11239566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
11249566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
11259566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1126bd59e6c6SAmlan Barua   #endif
11279496c9aeSAmlan Barua       break;
11285b3e41ffSAmlan Barua     default:
1129bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
11307a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
113126fbe8dcSKarl Rupp 
11329566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
11339566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
11349566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
11359566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11369566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11379566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
11389566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
11399566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1140bd59e6c6SAmlan Barua   #else
1141ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
114226fbe8dcSKarl Rupp 
1143ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
114426fbe8dcSKarl Rupp 
1145c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
114626fbe8dcSKarl Rupp 
1147ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
1148ba6e06d0SAmlan Barua 
1149ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1150ba6e06d0SAmlan Barua 
115157508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
115257508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));
1153ba6e06d0SAmlan Barua 
1154db4deed7SKarl Rupp       if (dim[ndim - 1] % 2 == 0) NM = 2;
1155db4deed7SKarl Rupp       else NM = 1;
1156ba6e06d0SAmlan Barua 
1157ba6e06d0SAmlan Barua       j = low;
115857508eceSPierre Jolivet       for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
1159ba6e06d0SAmlan Barua         indx1[i] = local_0_start * partial_dim + i;
1160ba6e06d0SAmlan Barua         indx2[i] = j;
11613564f093SHong Zhang         if (k % dim[ndim - 1] == 0) j += NM;
1162ba6e06d0SAmlan Barua         j++;
1163ba6e06d0SAmlan Barua       }
11649566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
11659566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
1166ba6e06d0SAmlan Barua 
11679566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
11689566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11699566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11709566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
11719566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
11729566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
11739566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
11749566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1175bd59e6c6SAmlan Barua   #endif
11769496c9aeSAmlan Barua       break;
11775b3e41ffSAmlan Barua     }
11780cf2e031SBarry Smith #endif
1179e81bb599SAmlan Barua   }
11803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
11815b3e41ffSAmlan Barua }
11825b3e41ffSAmlan Barua 
1183350f1385SJose E. Roman /*MC
1184350f1385SJose E. Roman   MATFFTW -  "fftw" - Matrix type that provides FFT via the FFTW external package.
1185350f1385SJose E. Roman 
1186350f1385SJose E. Roman   Level: intermediate
1187350f1385SJose E. Roman 
11881cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()`
1189350f1385SJose E. Roman M*/
1190d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1191d71ae5a4SJacob Faibussowitsch {
1192ce94432eSBarry Smith   MPI_Comm    comm;
1193b2b77a04SHong Zhang   Mat_FFT    *fft = (Mat_FFT *)A->data;
1194b2b77a04SHong Zhang   Mat_FFTW   *fftw;
11950cf2e031SBarry Smith   PetscInt    ndim = fft->ndim, *dim = fft->dim;
11965274ed1bSHong Zhang   const char *plans[]  = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
11975274ed1bSHong Zhang   unsigned    iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1198b2b77a04SHong Zhang   PetscBool   flg;
11994f48bc67SAmlan Barua   PetscInt    p_flag, partial_dim = 1, ctr;
120011902ff2SHong Zhang   PetscMPIInt size, rank;
12019496c9aeSAmlan Barua   ptrdiff_t  *pdim;
12029496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12030cf2e031SBarry Smith   PetscInt tot_dim;
12049496c9aeSAmlan Barua #endif
12059496c9aeSAmlan Barua 
1206b2b77a04SHong Zhang   PetscFunctionBegin;
12079566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
12089566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
12099566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1210c92418ccSAmlan Barua 
12110cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
12121878d478SAmlan Barua   fftw_mpi_init();
12130cf2e031SBarry Smith #endif
121411902ff2SHong Zhang   pdim    = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
121511902ff2SHong Zhang   pdim[0] = dim[0];
12168ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12178ca4c034SAmlan Barua   tot_dim = 2 * dim[0];
12188ca4c034SAmlan Barua #endif
12193564f093SHong Zhang   for (ctr = 1; ctr < ndim; ctr++) {
12206882372aSHong Zhang     partial_dim *= dim[ctr];
122111902ff2SHong Zhang     pdim[ctr] = dim[ctr];
12228ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1223db4deed7SKarl Rupp     if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1224db4deed7SKarl Rupp     else tot_dim *= dim[ctr];
12258ca4c034SAmlan Barua #endif
12266882372aSHong Zhang   }
12273c3a9c75SAmlan Barua 
1228b2b77a04SHong Zhang   if (size == 1) {
1229e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12309566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
12310cf2e031SBarry Smith     fft->n = fft->N;
1232e81bb599SAmlan Barua #else
12339566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
12340cf2e031SBarry Smith     fft->n = tot_dim;
12350cf2e031SBarry Smith #endif
12360cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
12370cf2e031SBarry Smith   } else {
12380cf2e031SBarry Smith     ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
12390cf2e031SBarry Smith   #if !defined(PETSC_USE_COMPLEX)
12400cf2e031SBarry Smith     ptrdiff_t temp;
12410cf2e031SBarry Smith     PetscInt  N1;
1242e81bb599SAmlan Barua   #endif
1243e81bb599SAmlan Barua 
1244b2b77a04SHong Zhang     switch (ndim) {
1245b2b77a04SHong Zhang     case 1:
12463c3a9c75SAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
12473c3a9c75SAmlan Barua       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1248e5d7f247SAmlan Barua   #else
12497a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
12500cf2e031SBarry Smith       fft->n = (PetscInt)local_n0;
12519566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N));
1252e5d7f247SAmlan Barua   #endif
1253b2b77a04SHong Zhang       break;
1254b2b77a04SHong Zhang     case 2:
12555b3e41ffSAmlan Barua   #if defined(PETSC_USE_COMPLEX)
12567a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
12570cf2e031SBarry Smith       fft->n = (PetscInt)local_n0 * dim[1];
12589566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
12595b3e41ffSAmlan Barua   #else
1260c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
126126fbe8dcSKarl Rupp 
12620cf2e031SBarry Smith       fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
12639566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
12645b3e41ffSAmlan Barua   #endif
1265b2b77a04SHong Zhang       break;
1266b2b77a04SHong Zhang     case 3:
126751d1eed7SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
12687a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
126926fbe8dcSKarl Rupp 
12700cf2e031SBarry Smith       fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
12719566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
127251d1eed7SAmlan Barua   #else
1273c3eba89aSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
127426fbe8dcSKarl Rupp 
12750cf2e031SBarry Smith       fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
12769566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
127751d1eed7SAmlan Barua   #endif
1278b2b77a04SHong Zhang       break;
1279b2b77a04SHong Zhang     default:
1280b3a17365SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
12817a21806cSHong Zhang       fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);
128226fbe8dcSKarl Rupp 
12830cf2e031SBarry Smith       fft->n = (PetscInt)local_n0 * partial_dim;
12849566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1285b3a17365SAmlan Barua   #else
1286b3a17365SAmlan Barua       temp = pdim[ndim - 1];
128726fbe8dcSKarl Rupp 
1288b3a17365SAmlan Barua       pdim[ndim - 1] = temp / 2 + 1;
128926fbe8dcSKarl Rupp 
1290c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
129126fbe8dcSKarl Rupp 
12920cf2e031SBarry Smith       fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp;
12930cf2e031SBarry Smith       N1     = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);
129426fbe8dcSKarl Rupp 
1295b3a17365SAmlan Barua       pdim[ndim - 1] = temp;
129626fbe8dcSKarl Rupp 
12979566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1298b3a17365SAmlan Barua   #endif
1299b2b77a04SHong Zhang       break;
1300b2b77a04SHong Zhang     }
13010cf2e031SBarry Smith #endif
1302b2b77a04SHong Zhang   }
13032277172eSMark Adams   free(pdim);
13049566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
13054dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&fftw));
1306b2b77a04SHong Zhang   fft->data = (void *)fftw;
1307b2b77a04SHong Zhang 
13080c74a584SJed Brown   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1309e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
131026fbe8dcSKarl Rupp 
1311f4f49eeaSPierre Jolivet   PetscCall(PetscMalloc1(ndim, &fftw->dim_fftw));
13128c1d5ab3SHong Zhang   if (size == 1) {
1313a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
1314a1b6d50cSHong Zhang     fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1315a1b6d50cSHong Zhang #else
13168c1d5ab3SHong Zhang     fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1317a1b6d50cSHong Zhang #endif
13188c1d5ab3SHong Zhang   }
131926fbe8dcSKarl Rupp 
1320b1301b2eSHong Zhang   for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];
1321c92418ccSAmlan Barua 
1322f4259b30SLisandro Dalcin   fftw->p_forward  = NULL;
1323f4259b30SLisandro Dalcin   fftw->p_backward = NULL;
1324b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1325b2b77a04SHong Zhang 
1326b2b77a04SHong Zhang   if (size == 1) {
1327b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1328b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
13290cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
1330b2b77a04SHong Zhang   } else {
1331b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1332b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
13330cf2e031SBarry Smith #endif
1334b2b77a04SHong Zhang   }
1335b2b77a04SHong Zhang   fft->matdestroy = MatDestroy_FFTW;
1336b2b77a04SHong Zhang   A->assembled    = PETSC_TRUE;
13374a52bad8SHong Zhang   A->preallocated = PETSC_TRUE;
133826fbe8dcSKarl Rupp 
13399566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
13409566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
13419566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));
1342b2b77a04SHong Zhang 
1343b2b77a04SHong Zhang   /* get runtime options */
1344d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
13459566063dSJacob Faibussowitsch   PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
13465f80ce2aSJacob Faibussowitsch   if (flg) fftw->p_flag = iplans[p_flag];
1347d0609cedSBarry Smith   PetscOptionsEnd();
13483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1349b2b77a04SHong Zhang }
1350