xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 2ef1f0ff6e3530e8731eb06ad663081f5844f49f)
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4c4762a1bSJed Brown     Testing examples can be found in ~src/mat/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
90cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
10c6db04a5SJed Brown   #include <fftw3-mpi.h>
110cf2e031SBarry Smith #else
120cf2e031SBarry Smith   #include <fftw3.h>
130cf2e031SBarry Smith #endif
14b2b77a04SHong Zhang EXTERN_C_END
15b2b77a04SHong Zhang 
16b2b77a04SHong Zhang typedef struct {
17b9ae062cSHong Zhang   ptrdiff_t ndim_fftw, *dim_fftw;
18a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
19a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
20a1b6d50cSHong Zhang #else
218c1d5ab3SHong Zhang   fftw_iodim *iodims;
22a1b6d50cSHong Zhang #endif
23e5d7f247SAmlan Barua   PetscInt     partial_dim;
24b2b77a04SHong Zhang   fftw_plan    p_forward, p_backward;
25b2b77a04SHong Zhang   unsigned     p_flag;                                      /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
26da81f932SPierre Jolivet   PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw plan should be
27b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
28b2b77a04SHong Zhang } Mat_FFTW;
29b2b77a04SHong Zhang 
30b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec);
31b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec);
320cf2e031SBarry Smith extern PetscErrorCode MatDestroy_FFTW(Mat);
330cf2e031SBarry Smith extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
340cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
35b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec);
36b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec);
37b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
380cf2e031SBarry Smith #endif
39b2b77a04SHong Zhang 
400cf2e031SBarry Smith /*
410cf2e031SBarry Smith    MatMult_SeqFFTW performs forward DFT
4297504071SAmlan Barua    Input parameter:
433564f093SHong Zhang      A - the matrix
4497504071SAmlan Barua      x - the vector on which FDFT will be performed
4597504071SAmlan Barua 
4697504071SAmlan Barua    Output parameter:
4797504071SAmlan Barua      y - vector that stores result of FDFT
4897504071SAmlan Barua */
49d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
50d71ae5a4SJacob Faibussowitsch {
51b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
52b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
53f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
54f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
551acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
56a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
57a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
58a1b6d50cSHong Zhang   #else
598c1d5ab3SHong Zhang   fftw_iodim *iodims;
60a1b6d50cSHong Zhang   #endif
611acd80e4SHong Zhang   PetscInt i;
621acd80e4SHong Zhang #endif
631acd80e4SHong Zhang   PetscInt ndim = fft->ndim, *dim = fft->dim;
64b2b77a04SHong Zhang 
65b2b77a04SHong Zhang   PetscFunctionBegin;
669566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &x_array));
679566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
686aad120cSJose E. Roman   if (!fftw->p_forward) { /* create a plan, then execute it */
69b2b77a04SHong Zhang     switch (ndim) {
70b2b77a04SHong Zhang     case 1:
7158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
72b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
7358a912c5SAmlan Barua #else
7458a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
7558a912c5SAmlan Barua #endif
76b2b77a04SHong Zhang       break;
77b2b77a04SHong Zhang     case 2:
7858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
79b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
8058a912c5SAmlan Barua #else
8158a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
8258a912c5SAmlan Barua #endif
83b2b77a04SHong Zhang       break;
84b2b77a04SHong Zhang     case 3:
8558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
86b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
8758a912c5SAmlan Barua #else
8858a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
8958a912c5SAmlan Barua #endif
90b2b77a04SHong Zhang       break;
91b2b77a04SHong Zhang     default:
9258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
93a1b6d50cSHong Zhang       iodims = fftw->iodims;
94a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
958c1d5ab3SHong Zhang       if (ndim) {
96a1b6d50cSHong Zhang         iodims[ndim - 1].n  = (ptrdiff_t)dim[ndim - 1];
978c1d5ab3SHong Zhang         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
988c1d5ab3SHong Zhang         for (i = ndim - 2; i >= 0; --i) {
99a1b6d50cSHong Zhang           iodims[i].n  = (ptrdiff_t)dim[i];
1008c1d5ab3SHong Zhang           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
1018c1d5ab3SHong Zhang         }
1028c1d5ab3SHong Zhang       }
103a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
104a1b6d50cSHong Zhang   #else
105a1b6d50cSHong Zhang       if (ndim) {
106a1b6d50cSHong Zhang         iodims[ndim - 1].n  = (int)dim[ndim - 1];
107a1b6d50cSHong Zhang         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
108a1b6d50cSHong Zhang         for (i = ndim - 2; i >= 0; --i) {
109a1b6d50cSHong Zhang           iodims[i].n  = (int)dim[i];
110a1b6d50cSHong Zhang           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
111a1b6d50cSHong Zhang         }
112a1b6d50cSHong Zhang       }
113a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
114a1b6d50cSHong Zhang   #endif
115a1b6d50cSHong Zhang 
11658a912c5SAmlan Barua #else
117a31b9140SHong Zhang       fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
11858a912c5SAmlan Barua #endif
119b2b77a04SHong Zhang       break;
120b2b77a04SHong Zhang     }
121f4fca6d4SMatthew G. Knepley     fftw->finarray  = (PetscScalar *)x_array;
122b2b77a04SHong Zhang     fftw->foutarray = y_array;
123b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
124b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
125b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
126b2b77a04SHong Zhang   } else {                                                         /* use existing plan */
127b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
1287e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
129b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
1307e4bc134SDominic Meiser #else
1317e4bc134SDominic Meiser       fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
1327e4bc134SDominic Meiser #endif
133b2b77a04SHong Zhang     } else {
134b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
135b2b77a04SHong Zhang     }
136b2b77a04SHong Zhang   }
1379566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
1389566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
1393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
140b2b77a04SHong Zhang }
141b2b77a04SHong Zhang 
14297504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT
14397504071SAmlan Barua    Input parameter:
1443564f093SHong Zhang      A - the matrix
14597504071SAmlan Barua      x - the vector on which BDFT will be performed
14697504071SAmlan Barua 
14797504071SAmlan Barua    Output parameter:
14897504071SAmlan Barua      y - vector that stores result of BDFT
14997504071SAmlan Barua */
15097504071SAmlan Barua 
151d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
152d71ae5a4SJacob Faibussowitsch {
153b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
154b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
155f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
156f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
157b2b77a04SHong Zhang   PetscInt           ndim = fft->ndim, *dim = fft->dim;
1581acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
159a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
160a1b6d50cSHong Zhang   fftw_iodim64 *iodims = fftw->iodims;
161a1b6d50cSHong Zhang   #else
162a1b6d50cSHong Zhang   fftw_iodim *iodims = fftw->iodims;
163a1b6d50cSHong Zhang   #endif
1641acd80e4SHong Zhang #endif
165b2b77a04SHong Zhang 
166b2b77a04SHong Zhang   PetscFunctionBegin;
1679566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &x_array));
1689566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
1696aad120cSJose E. Roman   if (!fftw->p_backward) { /* create a plan, then execute it */
170b2b77a04SHong Zhang     switch (ndim) {
171b2b77a04SHong Zhang     case 1:
17258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
173b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
17458a912c5SAmlan Barua #else
175e81bb599SAmlan Barua       fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
17658a912c5SAmlan Barua #endif
177b2b77a04SHong Zhang       break;
178b2b77a04SHong Zhang     case 2:
17958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
180b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
18158a912c5SAmlan Barua #else
182e81bb599SAmlan Barua       fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
18358a912c5SAmlan Barua #endif
184b2b77a04SHong Zhang       break;
185b2b77a04SHong Zhang     case 3:
18658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
187b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
18858a912c5SAmlan Barua #else
189e81bb599SAmlan Barua       fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
19058a912c5SAmlan Barua #endif
191b2b77a04SHong Zhang       break;
192b2b77a04SHong Zhang     default:
19358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
194a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
195a1b6d50cSHong Zhang       fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
196a1b6d50cSHong Zhang   #else
1978c1d5ab3SHong Zhang       fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
198a1b6d50cSHong Zhang   #endif
19958a912c5SAmlan Barua #else
200a31b9140SHong Zhang       fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
20158a912c5SAmlan Barua #endif
202b2b77a04SHong Zhang       break;
203b2b77a04SHong Zhang     }
204f4fca6d4SMatthew G. Knepley     fftw->binarray  = (PetscScalar *)x_array;
205b2b77a04SHong Zhang     fftw->boutarray = y_array;
2062f613bf5SBarry Smith     fftw_execute(fftw->p_backward);
207b2b77a04SHong Zhang   } else {                                                         /* use existing plan */
208b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
2097e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
210b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
2117e4bc134SDominic Meiser #else
2127e4bc134SDominic Meiser       fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
2137e4bc134SDominic Meiser #endif
214b2b77a04SHong Zhang     } else {
2152f613bf5SBarry Smith       fftw_execute(fftw->p_backward);
216b2b77a04SHong Zhang     }
217b2b77a04SHong Zhang   }
2189566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
2199566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
2203ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
221b2b77a04SHong Zhang }
222b2b77a04SHong Zhang 
2230cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
22497504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel
22597504071SAmlan Barua    Input parameter:
2263564f093SHong Zhang      A - the matrix
22797504071SAmlan Barua      x - the vector on which FDFT will be performed
22897504071SAmlan Barua 
22997504071SAmlan Barua    Output parameter:
23097504071SAmlan Barua    y   - vector that stores result of FDFT
23197504071SAmlan Barua */
232d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
233d71ae5a4SJacob Faibussowitsch {
234b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
235b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
236f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
237f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
238c92418ccSAmlan Barua   PetscInt           ndim = fft->ndim, *dim = fft->dim;
239ce94432eSBarry Smith   MPI_Comm           comm;
240b2b77a04SHong Zhang 
241b2b77a04SHong Zhang   PetscFunctionBegin;
2429566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2439566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &x_array));
2449566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
2456aad120cSJose E. Roman   if (!fftw->p_forward) { /* create a plan, then execute it */
246b2b77a04SHong Zhang     switch (ndim) {
247b2b77a04SHong Zhang     case 1:
2483c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
249b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
250ae0a50aaSHong Zhang   #else
2514f8276c3SHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
2523c3a9c75SAmlan Barua   #endif
253b2b77a04SHong Zhang       break;
254b2b77a04SHong Zhang     case 2:
25597504071SAmlan Barua   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
256b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2573c3a9c75SAmlan Barua   #else
2583c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2593c3a9c75SAmlan Barua   #endif
260b2b77a04SHong Zhang       break;
261b2b77a04SHong Zhang     case 3:
2623c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
263b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2643c3a9c75SAmlan Barua   #else
26551d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2663c3a9c75SAmlan Barua   #endif
267b2b77a04SHong Zhang       break;
268b2b77a04SHong Zhang     default:
2693c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
270c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2713c3a9c75SAmlan Barua   #else
2723c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2733c3a9c75SAmlan Barua   #endif
274b2b77a04SHong Zhang       break;
275b2b77a04SHong Zhang     }
276f4fca6d4SMatthew G. Knepley     fftw->finarray  = (PetscScalar *)x_array;
277b2b77a04SHong Zhang     fftw->foutarray = y_array;
278b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
279b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
280b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
281b2b77a04SHong Zhang   } else {                                                         /* use existing plan */
282b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
283b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
284b2b77a04SHong Zhang     } else {
285b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
286b2b77a04SHong Zhang     }
287b2b77a04SHong Zhang   }
2889566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
2899566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
2903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
291b2b77a04SHong Zhang }
292b2b77a04SHong Zhang 
2930cf2e031SBarry Smith /*
2940cf2e031SBarry Smith    MatMultTranspose_MPIFFTW performs parallel backward DFT
29597504071SAmlan Barua    Input parameter:
2963564f093SHong Zhang      A - the matrix
29797504071SAmlan Barua      x - the vector on which BDFT will be performed
29897504071SAmlan Barua 
29997504071SAmlan Barua    Output parameter:
30097504071SAmlan Barua      y - vector that stores result of BDFT
30197504071SAmlan Barua */
302d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
303d71ae5a4SJacob Faibussowitsch {
304b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
305b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
306f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
307f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
308c92418ccSAmlan Barua   PetscInt           ndim = fft->ndim, *dim = fft->dim;
309ce94432eSBarry Smith   MPI_Comm           comm;
310b2b77a04SHong Zhang 
311b2b77a04SHong Zhang   PetscFunctionBegin;
3129566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
3139566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(x, &x_array));
3149566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
3156aad120cSJose E. Roman   if (!fftw->p_backward) { /* create a plan, then execute it */
316b2b77a04SHong Zhang     switch (ndim) {
317b2b77a04SHong Zhang     case 1:
3183c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
319b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
320ae0a50aaSHong Zhang   #else
3214f8276c3SHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
3223c3a9c75SAmlan Barua   #endif
323b2b77a04SHong Zhang       break;
324b2b77a04SHong Zhang     case 2:
32597504071SAmlan Barua   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
326b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
3273c3a9c75SAmlan Barua   #else
3283c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3293c3a9c75SAmlan Barua   #endif
330b2b77a04SHong Zhang       break;
331b2b77a04SHong Zhang     case 3:
3323c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
333b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
3343c3a9c75SAmlan Barua   #else
3353c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3363c3a9c75SAmlan Barua   #endif
337b2b77a04SHong Zhang       break;
338b2b77a04SHong Zhang     default:
3393c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
340c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
3413c3a9c75SAmlan Barua   #else
3423c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3433c3a9c75SAmlan Barua   #endif
344b2b77a04SHong Zhang       break;
345b2b77a04SHong Zhang     }
346f4fca6d4SMatthew G. Knepley     fftw->binarray  = (PetscScalar *)x_array;
347b2b77a04SHong Zhang     fftw->boutarray = y_array;
3482f613bf5SBarry Smith     fftw_execute(fftw->p_backward);
349b2b77a04SHong Zhang   } else {                                                         /* use existing plan */
350b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
351b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
352b2b77a04SHong Zhang     } else {
3532f613bf5SBarry Smith       fftw_execute(fftw->p_backward);
354b2b77a04SHong Zhang     }
355b2b77a04SHong Zhang   }
3569566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
3579566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
3583ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
359b2b77a04SHong Zhang }
3600cf2e031SBarry Smith #endif
361b2b77a04SHong Zhang 
362d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_FFTW(Mat A)
363d71ae5a4SJacob Faibussowitsch {
364b2b77a04SHong Zhang   Mat_FFT  *fft  = (Mat_FFT *)A->data;
365b2b77a04SHong Zhang   Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
366b2b77a04SHong Zhang 
367b2b77a04SHong Zhang   PetscFunctionBegin;
368b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
369b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
370ad540459SPierre Jolivet   if (fftw->iodims) free(fftw->iodims);
3719566063dSJacob Faibussowitsch   PetscCall(PetscFree(fftw->dim_fftw));
3729566063dSJacob Faibussowitsch   PetscCall(PetscFree(fft->data));
3732e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
3742e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
3752e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
3760cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
3776ccf0b3eSHong Zhang   fftw_mpi_cleanup();
3780cf2e031SBarry Smith #endif
3793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
380b2b77a04SHong Zhang }
381b2b77a04SHong Zhang 
3820cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
383c6db04a5SJed Brown   #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I  "petscvec.h"   I*/
384d71ae5a4SJacob Faibussowitsch PetscErrorCode VecDestroy_MPIFFTW(Vec v)
385d71ae5a4SJacob Faibussowitsch {
386b2b77a04SHong Zhang   PetscScalar *array;
387b2b77a04SHong Zhang 
388b2b77a04SHong Zhang   PetscFunctionBegin;
3899566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &array));
3902f613bf5SBarry Smith   fftw_free((fftw_complex *)array);
3919566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &array));
3929566063dSJacob Faibussowitsch   PetscCall(VecDestroy_MPI(v));
3933ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
394b2b77a04SHong Zhang }
3950cf2e031SBarry Smith #endif
396b2b77a04SHong Zhang 
3970cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
398d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
399d71ae5a4SJacob Faibussowitsch {
4005b113e21Ss-sajid-ali   Mat A;
4015b113e21Ss-sajid-ali 
4025b113e21Ss-sajid-ali   PetscFunctionBegin;
4039566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
4049566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
4053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4065b113e21Ss-sajid-ali }
4075b113e21Ss-sajid-ali 
408d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
409d71ae5a4SJacob Faibussowitsch {
4105b113e21Ss-sajid-ali   Mat A;
4115b113e21Ss-sajid-ali 
4125b113e21Ss-sajid-ali   PetscFunctionBegin;
4139566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
4149566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
4153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4165b113e21Ss-sajid-ali }
4175b113e21Ss-sajid-ali 
418d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
419d71ae5a4SJacob Faibussowitsch {
4205b113e21Ss-sajid-ali   Mat A;
4215b113e21Ss-sajid-ali 
4225b113e21Ss-sajid-ali   PetscFunctionBegin;
4239566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
4249566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
4253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4265b113e21Ss-sajid-ali }
4270cf2e031SBarry Smith #endif
4285b113e21Ss-sajid-ali 
4294be45526SHong Zhang /*@
4302a7a6963SBarry Smith    MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
43111a5261eSBarry Smith      parallel layout determined by `MATFFTW`
4324f7415efSAmlan Barua 
433c3339decSBarry Smith    Collective
4344f7415efSAmlan Barua 
4354f7415efSAmlan Barua    Input Parameter:
4363564f093SHong Zhang .   A - the matrix
4374f7415efSAmlan Barua 
438d8d19677SJose E. Roman    Output Parameters:
439cc55f3b1SHong Zhang +   x - (optional) input vector of forward FFTW
4406b867d5aSJose E. Roman .   y - (optional) output vector of forward FFTW
441cc55f3b1SHong Zhang -   z - (optional) output vector of backward FFTW
4424f7415efSAmlan Barua 
443*2ef1f0ffSBarry Smith    Options Database Key:
444*2ef1f0ffSBarry Smith .  -mat_fftw_plannerflags - set FFTW planner flags
445*2ef1f0ffSBarry Smith 
4464f7415efSAmlan Barua   Level: advanced
4473564f093SHong Zhang 
44811a5261eSBarry Smith   Notes:
44911a5261eSBarry Smith   The parallel layout of output of forward FFTW is always same as the input
45097504071SAmlan Barua   of the backward FFTW. But parallel layout of the input vector of forward
45197504071SAmlan Barua   FFTW might not be same as the output of backward FFTW.
45211a5261eSBarry Smith 
45311a5261eSBarry Smith   We need to provide enough space while doing parallel real transform.
45497504071SAmlan Barua   We need to pad extra zeros at the end of last dimension. For this reason the one needs to
45597504071SAmlan Barua   invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
456e0ec536eSAmlan Barua   last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
457e0ec536eSAmlan Barua   depends on if the last dimension is even or odd. If the last dimension is even need to pad two
458e0ec536eSAmlan Barua   zeros if it is odd only one zero is needed.
45911a5261eSBarry Smith 
460e0ec536eSAmlan Barua   Lastly one needs some scratch space at the end of data set in each process. alloc_local
461e0ec536eSAmlan Barua   figures out how much space is needed, i.e. it figures out the data+scratch space for
462e0ec536eSAmlan Barua   each processor and returns that.
4634f7415efSAmlan Barua 
46411a5261eSBarry Smith   Developer Note:
46511a5261eSBarry Smith   How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?
46611a5261eSBarry Smith 
467*2ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
4684be45526SHong Zhang @*/
469d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
470d71ae5a4SJacob Faibussowitsch {
4714be45526SHong Zhang   PetscFunctionBegin;
472cac4c232SBarry Smith   PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
4733ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4744be45526SHong Zhang }
4754be45526SHong Zhang 
476d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
477d71ae5a4SJacob Faibussowitsch {
4784f7415efSAmlan Barua   PetscMPIInt size, rank;
479ce94432eSBarry Smith   MPI_Comm    comm;
4804f7415efSAmlan Barua   Mat_FFT    *fft = (Mat_FFT *)A->data;
4814f7415efSAmlan Barua 
4824f7415efSAmlan Barua   PetscFunctionBegin;
4834f7415efSAmlan Barua   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4844f7415efSAmlan Barua   PetscValidType(A, 1);
4859566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
4864f7415efSAmlan Barua 
4879566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
4889566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4894f7415efSAmlan Barua   if (size == 1) { /* sequential case */
4904f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4919566063dSJacob Faibussowitsch     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
4929566063dSJacob Faibussowitsch     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
4939566063dSJacob Faibussowitsch     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
4944f7415efSAmlan Barua #else
4959566063dSJacob Faibussowitsch     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
4969566063dSJacob Faibussowitsch     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
4979566063dSJacob Faibussowitsch     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
4984f7415efSAmlan Barua #endif
4990cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
50097504071SAmlan Barua   } else { /* parallel cases */
5010cf2e031SBarry Smith     Mat_FFTW     *fftw = (Mat_FFTW *)fft->data;
5020cf2e031SBarry Smith     PetscInt      ndim = fft->ndim, *dim = fft->dim;
5034f7415efSAmlan Barua     ptrdiff_t     alloc_local, local_n0, local_0_start;
5049496c9aeSAmlan Barua     ptrdiff_t     local_n1;
5059496c9aeSAmlan Barua     fftw_complex *data_fout;
5069496c9aeSAmlan Barua     ptrdiff_t     local_1_start;
5079496c9aeSAmlan Barua   #if defined(PETSC_USE_COMPLEX)
5089496c9aeSAmlan Barua     fftw_complex *data_fin, *data_bout;
5099496c9aeSAmlan Barua   #else
5104f7415efSAmlan Barua     double   *data_finr, *data_boutr;
5116ccf0b3eSHong Zhang     PetscInt  n1, N1;
5129496c9aeSAmlan Barua     ptrdiff_t temp;
5139496c9aeSAmlan Barua   #endif
5149496c9aeSAmlan Barua 
5154f7415efSAmlan Barua     switch (ndim) {
5164f7415efSAmlan Barua     case 1:
51757625b84SAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
5184f8276c3SHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
51957625b84SAmlan Barua   #else
52057625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
52157625b84SAmlan Barua       if (fin) {
52257625b84SAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5239566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin));
5249566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5255b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
52657625b84SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
52757625b84SAmlan Barua       }
52857625b84SAmlan Barua       if (fout) {
52957625b84SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5309566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout));
5319566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5325b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
53357625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
53457625b84SAmlan Barua       }
53557625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
53657625b84SAmlan Barua       if (bout) {
53757625b84SAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5389566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout));
5399566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5405b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
54157625b84SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
54257625b84SAmlan Barua       }
54357625b84SAmlan Barua       break;
54457625b84SAmlan Barua   #endif
5454f7415efSAmlan Barua     case 2:
54697504071SAmlan Barua   #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
5474f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
5489371c9d4SSatish Balay       N1          = 2 * dim[0] * (dim[1] / 2 + 1);
5499371c9d4SSatish Balay       n1          = 2 * local_n0 * (dim[1] / 2 + 1);
5504f7415efSAmlan Barua       if (fin) {
5514f7415efSAmlan Barua         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
5529566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
5539566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5545b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5554f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5564f7415efSAmlan Barua       }
55757625b84SAmlan Barua       if (fout) {
55857625b84SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5599566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
5609566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5615b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
56257625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
56357625b84SAmlan Barua       }
5645b113e21Ss-sajid-ali       if (bout) {
5655b113e21Ss-sajid-ali         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
5669566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
5679566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5685b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5695b113e21Ss-sajid-ali         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5705b113e21Ss-sajid-ali       }
5714f7415efSAmlan Barua   #else
5724f7415efSAmlan Barua       /* Get local size */
5734f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
5744f7415efSAmlan Barua       if (fin) {
5754f7415efSAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5769566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
5779566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5785b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5794f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5804f7415efSAmlan Barua       }
5814f7415efSAmlan Barua       if (fout) {
5824f7415efSAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5839566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
5849566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5855b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
5864f7415efSAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5874f7415efSAmlan Barua       }
5884f7415efSAmlan Barua       if (bout) {
5894f7415efSAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5909566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
5919566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5925b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5934f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5944f7415efSAmlan Barua       }
5954f7415efSAmlan Barua   #endif
5964f7415efSAmlan Barua       break;
5974f7415efSAmlan Barua     case 3:
5984f7415efSAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
5994f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
6009371c9d4SSatish Balay       N1          = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1);
6019371c9d4SSatish Balay       n1          = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1);
6024f7415efSAmlan Barua       if (fin) {
6034f7415efSAmlan Barua         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6049566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
6059566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6065b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6074f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6084f7415efSAmlan Barua       }
6095b113e21Ss-sajid-ali       if (fout) {
6105b113e21Ss-sajid-ali         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6119566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
6129566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6135b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6145b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6155b113e21Ss-sajid-ali       }
6164f7415efSAmlan Barua       if (bout) {
6174f7415efSAmlan Barua         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6189566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
6199566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6205b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6214f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6224f7415efSAmlan Barua       }
6234f7415efSAmlan Barua   #else
6240c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
6250c9b18e4SAmlan Barua       if (fin) {
6260c9b18e4SAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6279566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
6289566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6295b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6300c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6310c9b18e4SAmlan Barua       }
6320c9b18e4SAmlan Barua       if (fout) {
6330c9b18e4SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6349566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
6359566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6365b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6370c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6380c9b18e4SAmlan Barua       }
6390c9b18e4SAmlan Barua       if (bout) {
6400c9b18e4SAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6419566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
6429566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6435b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6440c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6450c9b18e4SAmlan Barua       }
6464f7415efSAmlan Barua   #endif
6474f7415efSAmlan Barua       break;
6484f7415efSAmlan Barua     default:
6494f7415efSAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
6504f7415efSAmlan Barua       temp                                  = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
6514f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
6524f7415efSAmlan Barua       alloc_local                           = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
6530cf2e031SBarry Smith       N1                                    = 2 * fft->N * (PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
6544f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
6554f7415efSAmlan Barua 
6564f7415efSAmlan Barua       if (fin) {
6574f7415efSAmlan Barua         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6589566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
6599566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6605b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6614f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6624f7415efSAmlan Barua       }
6635b113e21Ss-sajid-ali       if (fout) {
6645b113e21Ss-sajid-ali         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6659566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
6669566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6675b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6685b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6695b113e21Ss-sajid-ali       }
6704f7415efSAmlan Barua       if (bout) {
6714f7415efSAmlan Barua         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6729566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
6739566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6745b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6759496c9aeSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6764f7415efSAmlan Barua       }
6774f7415efSAmlan Barua   #else
6780c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
6790c9b18e4SAmlan Barua       if (fin) {
6800c9b18e4SAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6819566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
6829566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6835b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6840c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6850c9b18e4SAmlan Barua       }
6860c9b18e4SAmlan Barua       if (fout) {
6870c9b18e4SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6889566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
6899566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6905b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6910c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6920c9b18e4SAmlan Barua       }
6930c9b18e4SAmlan Barua       if (bout) {
6940c9b18e4SAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6959566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
6969566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6975b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6980c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6990c9b18e4SAmlan Barua       }
7004f7415efSAmlan Barua   #endif
7014f7415efSAmlan Barua       break;
7024f7415efSAmlan Barua     }
703f9d91177SJunchao Zhang     /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
704f9d91177SJunchao Zhang        v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
705da81f932SPierre Jolivet        memory leaks. We void these pointers here to avoid accident uses.
706f9d91177SJunchao Zhang     */
707f9d91177SJunchao Zhang     if (fin) (*fin)->ops->replacearray = NULL;
708f9d91177SJunchao Zhang     if (fout) (*fout)->ops->replacearray = NULL;
709f9d91177SJunchao Zhang     if (bout) (*bout)->ops->replacearray = NULL;
7100cf2e031SBarry Smith #endif
7114f7415efSAmlan Barua   }
7123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
7134f7415efSAmlan Barua }
7144f7415efSAmlan Barua 
7153564f093SHong Zhang /*@
71611a5261eSBarry Smith    VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.
71754efbe56SHong Zhang 
718c3339decSBarry Smith    Collective
7193564f093SHong Zhang 
7203564f093SHong Zhang    Input Parameters:
7213564f093SHong Zhang +  A - FFTW matrix
7223564f093SHong Zhang -  x - the PETSc vector
7233564f093SHong Zhang 
7243564f093SHong Zhang    Output Parameters:
7253564f093SHong Zhang .  y - the FFTW vector
7263564f093SHong Zhang 
727b2b77a04SHong Zhang    Level: intermediate
7283564f093SHong Zhang 
72911a5261eSBarry Smith    Note:
73011a5261eSBarry Smith    For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
73197504071SAmlan Barua    one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
73297504071SAmlan Barua    zeros. This routine does that job by scattering operation.
73397504071SAmlan Barua 
734*2ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
7353564f093SHong Zhang @*/
736d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
737d71ae5a4SJacob Faibussowitsch {
7383564f093SHong Zhang   PetscFunctionBegin;
739cac4c232SBarry Smith   PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
7403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
7413564f093SHong Zhang }
7423c3a9c75SAmlan Barua 
743d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
744d71ae5a4SJacob Faibussowitsch {
745ce94432eSBarry Smith   MPI_Comm    comm;
7463c3a9c75SAmlan Barua   Mat_FFT    *fft = (Mat_FFT *)A->data;
7479496c9aeSAmlan Barua   PetscInt    low;
7487a21806cSHong Zhang   PetscMPIInt rank, size;
7497a21806cSHong Zhang   PetscInt    vsize, vsize1;
7503c3a9c75SAmlan Barua   VecScatter  vecscat;
7510cf2e031SBarry Smith   IS          list1;
7523c3a9c75SAmlan Barua 
7533564f093SHong Zhang   PetscFunctionBegin;
7549566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
7559566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
7569566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
7579566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRange(y, &low, NULL));
7583c3a9c75SAmlan Barua 
7593564f093SHong Zhang   if (size == 1) {
7609566063dSJacob Faibussowitsch     PetscCall(VecGetSize(x, &vsize));
7619566063dSJacob Faibussowitsch     PetscCall(VecGetSize(y, &vsize1));
7629566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
7639566063dSJacob Faibussowitsch     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
7649566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7659566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7669566063dSJacob Faibussowitsch     PetscCall(VecScatterDestroy(&vecscat));
7679566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&list1));
7680cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
7693564f093SHong Zhang   } else {
7700cf2e031SBarry Smith     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
7710cf2e031SBarry Smith     PetscInt  ndim = fft->ndim, *dim = fft->dim;
7720cf2e031SBarry Smith     ptrdiff_t local_n0, local_0_start;
7730cf2e031SBarry Smith     ptrdiff_t local_n1, local_1_start;
7740cf2e031SBarry Smith     IS        list2;
7750cf2e031SBarry Smith   #if !defined(PETSC_USE_COMPLEX)
7760cf2e031SBarry Smith     PetscInt  i, j, k, partial_dim;
7770cf2e031SBarry Smith     PetscInt *indx1, *indx2, tempindx, tempindx1;
7780cf2e031SBarry Smith     PetscInt  NM;
7790cf2e031SBarry Smith     ptrdiff_t temp;
7800cf2e031SBarry Smith   #endif
7810cf2e031SBarry Smith 
7823c3a9c75SAmlan Barua     switch (ndim) {
7833c3a9c75SAmlan Barua     case 1:
78464657d84SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
7857a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
78626fbe8dcSKarl Rupp 
7879566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1));
7889566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2));
7899566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
7909566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7919566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7929566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
7939566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
7949566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
79564657d84SAmlan Barua   #else
796e5d7f247SAmlan Barua       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
79764657d84SAmlan Barua   #endif
7983c3a9c75SAmlan Barua       break;
7993c3a9c75SAmlan Barua     case 2:
800bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
8017a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
80226fbe8dcSKarl Rupp 
8039566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
8049566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
8059566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8069566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8079566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8089566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8099566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8109566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
811bd59e6c6SAmlan Barua   #else
812c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
81326fbe8dcSKarl Rupp 
8149566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1));
8159566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2));
8163c3a9c75SAmlan Barua 
8173564f093SHong Zhang       if (dim[1] % 2 == 0) {
8183c3a9c75SAmlan Barua         NM = dim[1] + 2;
8193564f093SHong Zhang       } else {
8203c3a9c75SAmlan Barua         NM = dim[1] + 1;
8213564f093SHong Zhang       }
8223c3a9c75SAmlan Barua       for (i = 0; i < local_n0; i++) {
8233c3a9c75SAmlan Barua         for (j = 0; j < dim[1]; j++) {
8243c3a9c75SAmlan Barua           tempindx  = i * dim[1] + j;
8253c3a9c75SAmlan Barua           tempindx1 = i * NM + j;
82626fbe8dcSKarl Rupp 
8275b3e41ffSAmlan Barua           indx1[tempindx] = local_0_start * dim[1] + tempindx;
8283c3a9c75SAmlan Barua           indx2[tempindx] = low + tempindx1;
8293c3a9c75SAmlan Barua         }
8303c3a9c75SAmlan Barua       }
8313c3a9c75SAmlan Barua 
8329566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
8339566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
8343c3a9c75SAmlan Barua 
8359566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8369566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8379566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8389566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8399566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8409566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
8419566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
8429566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
843bd59e6c6SAmlan Barua   #endif
8449496c9aeSAmlan Barua       break;
8453c3a9c75SAmlan Barua 
8463c3a9c75SAmlan Barua     case 3:
847bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
8487a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
84926fbe8dcSKarl Rupp 
8509566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
8519566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
8529566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8539566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8549566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8559566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8569566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8579566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
858bd59e6c6SAmlan Barua   #else
859c4762a1bSJed Brown       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
860f1ade23cSHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
8617a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
86226fbe8dcSKarl Rupp 
8639566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1));
8649566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2));
86551d1eed7SAmlan Barua 
866db4deed7SKarl Rupp       if (dim[2] % 2 == 0) NM = dim[2] + 2;
867db4deed7SKarl Rupp       else NM = dim[2] + 1;
86851d1eed7SAmlan Barua 
86951d1eed7SAmlan Barua       for (i = 0; i < local_n0; i++) {
87051d1eed7SAmlan Barua         for (j = 0; j < dim[1]; j++) {
87151d1eed7SAmlan Barua           for (k = 0; k < dim[2]; k++) {
87251d1eed7SAmlan Barua             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
87351d1eed7SAmlan Barua             tempindx1 = i * dim[1] * NM + j * NM + k;
87426fbe8dcSKarl Rupp 
87551d1eed7SAmlan Barua             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
87651d1eed7SAmlan Barua             indx2[tempindx] = low + tempindx1;
87751d1eed7SAmlan Barua           }
87851d1eed7SAmlan Barua         }
87951d1eed7SAmlan Barua       }
88051d1eed7SAmlan Barua 
8819566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
8829566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
8839566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8849566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8859566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8869566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8879566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8889566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
8899566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
8909566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
891bd59e6c6SAmlan Barua   #endif
8929496c9aeSAmlan Barua       break;
8933c3a9c75SAmlan Barua 
8943c3a9c75SAmlan Barua     default:
895bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
8967a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
89726fbe8dcSKarl Rupp 
8989566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
8999566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
9009566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
9019566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9029566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9039566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
9049566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
9059566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
906bd59e6c6SAmlan Barua   #else
907c4762a1bSJed Brown       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
908f1ade23cSHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
909e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
91026fbe8dcSKarl Rupp 
911e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
91226fbe8dcSKarl Rupp 
9137a21806cSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
91426fbe8dcSKarl Rupp 
915e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
916e5d7f247SAmlan Barua 
917e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
918e5d7f247SAmlan Barua 
9199566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1));
9209566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2));
921e5d7f247SAmlan Barua 
922db4deed7SKarl Rupp       if (dim[ndim - 1] % 2 == 0) NM = 2;
923db4deed7SKarl Rupp       else NM = 1;
924e5d7f247SAmlan Barua 
9256971673cSAmlan Barua       j = low;
9263564f093SHong Zhang       for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
9276971673cSAmlan Barua         indx1[i] = local_0_start * partial_dim + i;
9286971673cSAmlan Barua         indx2[i] = j;
92926fbe8dcSKarl Rupp         if (k % dim[ndim - 1] == 0) j += NM;
9306971673cSAmlan Barua         j++;
9316971673cSAmlan Barua       }
9329566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
9339566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
9349566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
9359566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9369566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9379566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
9389566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
9399566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
9409566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
9419566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
942bd59e6c6SAmlan Barua   #endif
9439496c9aeSAmlan Barua       break;
9443c3a9c75SAmlan Barua     }
9450cf2e031SBarry Smith #endif
946e81bb599SAmlan Barua   }
9473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9483c3a9c75SAmlan Barua }
9493c3a9c75SAmlan Barua 
9503564f093SHong Zhang /*@
95111a5261eSBarry Smith    VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.
9523564f093SHong Zhang 
953c3339decSBarry Smith    Collective
9543564f093SHong Zhang 
9553564f093SHong Zhang     Input Parameters:
95611a5261eSBarry Smith +   A - `MATFFTW` matrix
9573564f093SHong Zhang -   x - FFTW vector
9583564f093SHong Zhang 
9593564f093SHong Zhang    Output Parameters:
9603564f093SHong Zhang .  y - PETSc vector
9613564f093SHong Zhang 
9623564f093SHong Zhang    Level: intermediate
9633564f093SHong Zhang 
96411a5261eSBarry Smith    Note:
96511a5261eSBarry Smith    While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
96611a5261eSBarry Smith    `VecScatterFFTWToPetsc()` removes those extra zeros.
9673564f093SHong Zhang 
968*2ef1f0ffSBarry Smith .seealso: [](chapter_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
9693564f093SHong Zhang @*/
970d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
971d71ae5a4SJacob Faibussowitsch {
9723c3a9c75SAmlan Barua   PetscFunctionBegin;
973cac4c232SBarry Smith   PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
9743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9753c3a9c75SAmlan Barua }
97654efbe56SHong Zhang 
977d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
978d71ae5a4SJacob Faibussowitsch {
979ce94432eSBarry Smith   MPI_Comm    comm;
9805b3e41ffSAmlan Barua   Mat_FFT    *fft = (Mat_FFT *)A->data;
9819496c9aeSAmlan Barua   PetscInt    low;
9827a21806cSHong Zhang   PetscMPIInt rank, size;
9835b3e41ffSAmlan Barua   VecScatter  vecscat;
9840cf2e031SBarry Smith   IS          list1;
9859496c9aeSAmlan Barua 
9863564f093SHong Zhang   PetscFunctionBegin;
9879566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
9889566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
9899566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
9909566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRange(x, &low, NULL));
9915b3e41ffSAmlan Barua 
992e81bb599SAmlan Barua   if (size == 1) {
9939566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
9949566063dSJacob Faibussowitsch     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
9959566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9969566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9979566063dSJacob Faibussowitsch     PetscCall(VecScatterDestroy(&vecscat));
9989566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&list1));
999e81bb599SAmlan Barua 
10000cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
10013564f093SHong Zhang   } else {
10020cf2e031SBarry Smith     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
10030cf2e031SBarry Smith     PetscInt  ndim = fft->ndim, *dim = fft->dim;
10040cf2e031SBarry Smith     ptrdiff_t local_n0, local_0_start;
10050cf2e031SBarry Smith     ptrdiff_t local_n1, local_1_start;
10060cf2e031SBarry Smith     IS        list2;
10070cf2e031SBarry Smith   #if !defined(PETSC_USE_COMPLEX)
10080cf2e031SBarry Smith     PetscInt  i, j, k, partial_dim;
10090cf2e031SBarry Smith     PetscInt *indx1, *indx2, tempindx, tempindx1;
10100cf2e031SBarry Smith     PetscInt  NM;
10110cf2e031SBarry Smith     ptrdiff_t temp;
10120cf2e031SBarry Smith   #endif
1013e81bb599SAmlan Barua     switch (ndim) {
1014e81bb599SAmlan Barua     case 1:
101564657d84SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
10167a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
101726fbe8dcSKarl Rupp 
10189566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1));
10199566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2));
10209566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
10219566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10229566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10239566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10249566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10259566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
102664657d84SAmlan Barua   #else
10276a506ed8SAmlan Barua       SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
102864657d84SAmlan Barua   #endif
10295b3e41ffSAmlan Barua       break;
10305b3e41ffSAmlan Barua     case 2:
1031bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
10327a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
103326fbe8dcSKarl Rupp 
10349566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
10359566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
10369566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
10379566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10389566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10399566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10409566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10419566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1042bd59e6c6SAmlan Barua   #else
10437a21806cSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
104426fbe8dcSKarl Rupp 
10459566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1));
10469566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2));
10475b3e41ffSAmlan Barua 
1048db4deed7SKarl Rupp       if (dim[1] % 2 == 0) NM = dim[1] + 2;
1049db4deed7SKarl Rupp       else NM = dim[1] + 1;
10505b3e41ffSAmlan Barua 
10515b3e41ffSAmlan Barua       for (i = 0; i < local_n0; i++) {
10525b3e41ffSAmlan Barua         for (j = 0; j < dim[1]; j++) {
10535b3e41ffSAmlan Barua           tempindx  = i * dim[1] + j;
10545b3e41ffSAmlan Barua           tempindx1 = i * NM + j;
105526fbe8dcSKarl Rupp 
10565b3e41ffSAmlan Barua           indx1[tempindx] = local_0_start * dim[1] + tempindx;
10575b3e41ffSAmlan Barua           indx2[tempindx] = low + tempindx1;
10585b3e41ffSAmlan Barua         }
10595b3e41ffSAmlan Barua       }
10605b3e41ffSAmlan Barua 
10619566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
10629566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
10635b3e41ffSAmlan Barua 
10649566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
10659566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10669566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10679566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10689566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10699566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
10709566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
10719566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1072bd59e6c6SAmlan Barua   #endif
10739496c9aeSAmlan Barua       break;
10745b3e41ffSAmlan Barua     case 3:
1075bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
10767a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
107726fbe8dcSKarl Rupp 
10789566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
10799566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
10809566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
10819566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10829566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10839566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10849566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10859566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1086bd59e6c6SAmlan Barua   #else
10877a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
108826fbe8dcSKarl Rupp 
10899566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1));
10909566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2));
109151d1eed7SAmlan Barua 
1092db4deed7SKarl Rupp       if (dim[2] % 2 == 0) NM = dim[2] + 2;
1093db4deed7SKarl Rupp       else NM = dim[2] + 1;
109451d1eed7SAmlan Barua 
109551d1eed7SAmlan Barua       for (i = 0; i < local_n0; i++) {
109651d1eed7SAmlan Barua         for (j = 0; j < dim[1]; j++) {
109751d1eed7SAmlan Barua           for (k = 0; k < dim[2]; k++) {
109851d1eed7SAmlan Barua             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
109951d1eed7SAmlan Barua             tempindx1 = i * dim[1] * NM + j * NM + k;
110026fbe8dcSKarl Rupp 
110151d1eed7SAmlan Barua             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
110251d1eed7SAmlan Barua             indx2[tempindx] = low + tempindx1;
110351d1eed7SAmlan Barua           }
110451d1eed7SAmlan Barua         }
110551d1eed7SAmlan Barua       }
110651d1eed7SAmlan Barua 
11079566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
11089566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
110951d1eed7SAmlan Barua 
11109566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
11119566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11129566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11139566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
11149566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
11159566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
11169566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
11179566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1118bd59e6c6SAmlan Barua   #endif
11199496c9aeSAmlan Barua       break;
11205b3e41ffSAmlan Barua     default:
1121bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
11227a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
112326fbe8dcSKarl Rupp 
11249566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
11259566063dSJacob Faibussowitsch       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
11269566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
11279566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11289566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11299566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
11309566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
11319566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1132bd59e6c6SAmlan Barua   #else
1133ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
113426fbe8dcSKarl Rupp 
1135ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
113626fbe8dcSKarl Rupp 
1137c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
113826fbe8dcSKarl Rupp 
1139ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
1140ba6e06d0SAmlan Barua 
1141ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1142ba6e06d0SAmlan Barua 
11439566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1));
11449566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2));
1145ba6e06d0SAmlan Barua 
1146db4deed7SKarl Rupp       if (dim[ndim - 1] % 2 == 0) NM = 2;
1147db4deed7SKarl Rupp       else NM = 1;
1148ba6e06d0SAmlan Barua 
1149ba6e06d0SAmlan Barua       j = low;
11503564f093SHong Zhang       for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
1151ba6e06d0SAmlan Barua         indx1[i] = local_0_start * partial_dim + i;
1152ba6e06d0SAmlan Barua         indx2[i] = j;
11533564f093SHong Zhang         if (k % dim[ndim - 1] == 0) j += NM;
1154ba6e06d0SAmlan Barua         j++;
1155ba6e06d0SAmlan Barua       }
11569566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
11579566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
1158ba6e06d0SAmlan Barua 
11599566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
11609566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11619566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11629566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
11639566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
11649566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
11659566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
11669566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1167bd59e6c6SAmlan Barua   #endif
11689496c9aeSAmlan Barua       break;
11695b3e41ffSAmlan Barua     }
11700cf2e031SBarry Smith #endif
1171e81bb599SAmlan Barua   }
11723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
11735b3e41ffSAmlan Barua }
11745b3e41ffSAmlan Barua 
1175d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1176d71ae5a4SJacob Faibussowitsch {
1177ce94432eSBarry Smith   MPI_Comm    comm;
1178b2b77a04SHong Zhang   Mat_FFT    *fft = (Mat_FFT *)A->data;
1179b2b77a04SHong Zhang   Mat_FFTW   *fftw;
11800cf2e031SBarry Smith   PetscInt    ndim = fft->ndim, *dim = fft->dim;
11815274ed1bSHong Zhang   const char *plans[]  = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
11825274ed1bSHong Zhang   unsigned    iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1183b2b77a04SHong Zhang   PetscBool   flg;
11844f48bc67SAmlan Barua   PetscInt    p_flag, partial_dim = 1, ctr;
118511902ff2SHong Zhang   PetscMPIInt size, rank;
11869496c9aeSAmlan Barua   ptrdiff_t  *pdim;
11879496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11880cf2e031SBarry Smith   PetscInt tot_dim;
11899496c9aeSAmlan Barua #endif
11909496c9aeSAmlan Barua 
1191b2b77a04SHong Zhang   PetscFunctionBegin;
11929566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
11939566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
11949566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1195c92418ccSAmlan Barua 
11960cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
11971878d478SAmlan Barua   fftw_mpi_init();
11980cf2e031SBarry Smith #endif
119911902ff2SHong Zhang   pdim    = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
120011902ff2SHong Zhang   pdim[0] = dim[0];
12018ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12028ca4c034SAmlan Barua   tot_dim = 2 * dim[0];
12038ca4c034SAmlan Barua #endif
12043564f093SHong Zhang   for (ctr = 1; ctr < ndim; ctr++) {
12056882372aSHong Zhang     partial_dim *= dim[ctr];
120611902ff2SHong Zhang     pdim[ctr] = dim[ctr];
12078ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1208db4deed7SKarl Rupp     if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1209db4deed7SKarl Rupp     else tot_dim *= dim[ctr];
12108ca4c034SAmlan Barua #endif
12116882372aSHong Zhang   }
12123c3a9c75SAmlan Barua 
1213b2b77a04SHong Zhang   if (size == 1) {
1214e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12159566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
12160cf2e031SBarry Smith     fft->n = fft->N;
1217e81bb599SAmlan Barua #else
12189566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
12190cf2e031SBarry Smith     fft->n       = tot_dim;
12200cf2e031SBarry Smith #endif
12210cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
12220cf2e031SBarry Smith   } else {
12230cf2e031SBarry Smith     ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
12240cf2e031SBarry Smith   #if !defined(PETSC_USE_COMPLEX)
12250cf2e031SBarry Smith     ptrdiff_t temp;
12260cf2e031SBarry Smith     PetscInt  N1;
1227e81bb599SAmlan Barua   #endif
1228e81bb599SAmlan Barua 
1229b2b77a04SHong Zhang     switch (ndim) {
1230b2b77a04SHong Zhang     case 1:
12313c3a9c75SAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
12323c3a9c75SAmlan Barua       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1233e5d7f247SAmlan Barua   #else
12347a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
12350cf2e031SBarry Smith       fft->n = (PetscInt)local_n0;
12369566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N));
1237e5d7f247SAmlan Barua   #endif
1238b2b77a04SHong Zhang       break;
1239b2b77a04SHong Zhang     case 2:
12405b3e41ffSAmlan Barua   #if defined(PETSC_USE_COMPLEX)
12417a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
12420cf2e031SBarry Smith       fft->n = (PetscInt)local_n0 * dim[1];
12439566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
12445b3e41ffSAmlan Barua   #else
1245c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
124626fbe8dcSKarl Rupp 
12470cf2e031SBarry Smith       fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
12489566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
12495b3e41ffSAmlan Barua   #endif
1250b2b77a04SHong Zhang       break;
1251b2b77a04SHong Zhang     case 3:
125251d1eed7SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
12537a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
125426fbe8dcSKarl Rupp 
12550cf2e031SBarry Smith       fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
12569566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
125751d1eed7SAmlan Barua   #else
1258c3eba89aSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
125926fbe8dcSKarl Rupp 
12600cf2e031SBarry Smith       fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
12619566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
126251d1eed7SAmlan Barua   #endif
1263b2b77a04SHong Zhang       break;
1264b2b77a04SHong Zhang     default:
1265b3a17365SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
12667a21806cSHong Zhang       fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);
126726fbe8dcSKarl Rupp 
12680cf2e031SBarry Smith       fft->n = (PetscInt)local_n0 * partial_dim;
12699566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1270b3a17365SAmlan Barua   #else
1271b3a17365SAmlan Barua       temp = pdim[ndim - 1];
127226fbe8dcSKarl Rupp 
1273b3a17365SAmlan Barua       pdim[ndim - 1] = temp / 2 + 1;
127426fbe8dcSKarl Rupp 
1275c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
127626fbe8dcSKarl Rupp 
12770cf2e031SBarry Smith       fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp;
12780cf2e031SBarry Smith       N1     = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);
127926fbe8dcSKarl Rupp 
1280b3a17365SAmlan Barua       pdim[ndim - 1] = temp;
128126fbe8dcSKarl Rupp 
12829566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1283b3a17365SAmlan Barua   #endif
1284b2b77a04SHong Zhang       break;
1285b2b77a04SHong Zhang     }
12860cf2e031SBarry Smith #endif
1287b2b77a04SHong Zhang   }
12882277172eSMark Adams   free(pdim);
12899566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
12904dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&fftw));
1291b2b77a04SHong Zhang   fft->data = (void *)fftw;
1292b2b77a04SHong Zhang 
12930c74a584SJed Brown   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1294e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
129526fbe8dcSKarl Rupp 
12969566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw)));
12978c1d5ab3SHong Zhang   if (size == 1) {
1298a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
1299a1b6d50cSHong Zhang     fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1300a1b6d50cSHong Zhang #else
13018c1d5ab3SHong Zhang     fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1302a1b6d50cSHong Zhang #endif
13038c1d5ab3SHong Zhang   }
130426fbe8dcSKarl Rupp 
1305b1301b2eSHong Zhang   for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];
1306c92418ccSAmlan Barua 
1307f4259b30SLisandro Dalcin   fftw->p_forward  = NULL;
1308f4259b30SLisandro Dalcin   fftw->p_backward = NULL;
1309b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1310b2b77a04SHong Zhang 
1311b2b77a04SHong Zhang   if (size == 1) {
1312b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1313b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
13140cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
1315b2b77a04SHong Zhang   } else {
1316b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1317b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
13180cf2e031SBarry Smith #endif
1319b2b77a04SHong Zhang   }
1320b2b77a04SHong Zhang   fft->matdestroy = MatDestroy_FFTW;
1321b2b77a04SHong Zhang   A->assembled    = PETSC_TRUE;
13224a52bad8SHong Zhang   A->preallocated = PETSC_TRUE;
132326fbe8dcSKarl Rupp 
13249566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
13259566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
13269566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));
1327b2b77a04SHong Zhang 
1328b2b77a04SHong Zhang   /* get runtime options */
1329d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
13309566063dSJacob Faibussowitsch   PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
13315f80ce2aSJacob Faibussowitsch   if (flg) fftw->p_flag = iplans[p_flag];
1332d0609cedSBarry Smith   PetscOptionsEnd();
13333ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1334b2b77a04SHong Zhang }
1335