1b2b77a04SHong Zhang /* 2b2b77a04SHong Zhang Provides an interface to the FFTW package. 3c4762a1bSJed Brown Testing examples can be found in ~src/mat/tests 4b2b77a04SHong Zhang */ 5b2b77a04SHong Zhang 6c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 7b2b77a04SHong Zhang EXTERN_C_BEGIN 80cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 9c6db04a5SJed Brown #include <fftw3-mpi.h> 100cf2e031SBarry Smith #else 110cf2e031SBarry Smith #include <fftw3.h> 120cf2e031SBarry Smith #endif 13b2b77a04SHong Zhang EXTERN_C_END 14b2b77a04SHong Zhang 15b2b77a04SHong Zhang typedef struct { 16b9ae062cSHong Zhang ptrdiff_t ndim_fftw, *dim_fftw; 17a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 18a1b6d50cSHong Zhang fftw_iodim64 *iodims; 19a1b6d50cSHong Zhang #else 208c1d5ab3SHong Zhang fftw_iodim *iodims; 21a1b6d50cSHong Zhang #endif 22e5d7f247SAmlan Barua PetscInt partial_dim; 23b2b77a04SHong Zhang fftw_plan p_forward, p_backward; 24b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 25e8bdd179SBarry Smith PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw_execute() can only be executed for the arrays with which the plan was created */ 26b2b77a04SHong Zhang } Mat_FFTW; 27b2b77a04SHong Zhang 280cf2e031SBarry Smith extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *); 29b2b77a04SHong Zhang 30*523895eeSPierre Jolivet static PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y) 31d71ae5a4SJacob Faibussowitsch { 32b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 33b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 34f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 35f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 36e8bdd179SBarry Smith Vec xx; 371acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 38a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 39a1b6d50cSHong Zhang fftw_iodim64 *iodims; 40a1b6d50cSHong Zhang #else 418c1d5ab3SHong Zhang fftw_iodim *iodims; 42a1b6d50cSHong Zhang #endif 431acd80e4SHong Zhang PetscInt i; 441acd80e4SHong Zhang #endif 451acd80e4SHong Zhang PetscInt ndim = fft->ndim, *dim = fft->dim; 46b2b77a04SHong Zhang 47b2b77a04SHong Zhang PetscFunctionBegin; 48e8bdd179SBarry Smith if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) { 49e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 50e8bdd179SBarry Smith PetscCall(VecDuplicate(x, &xx)); 51e8bdd179SBarry Smith PetscCall(VecGetArrayRead(xx, &x_array)); 52e8bdd179SBarry Smith } else { 539566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 54e8bdd179SBarry Smith } 559566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 566aad120cSJose E. Roman if (!fftw->p_forward) { /* create a plan, then execute it */ 57b2b77a04SHong Zhang switch (ndim) { 58b2b77a04SHong Zhang case 1: 5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 60b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 6158a912c5SAmlan Barua #else 6258a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 6358a912c5SAmlan Barua #endif 64b2b77a04SHong Zhang break; 65b2b77a04SHong Zhang case 2: 6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 67b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 6858a912c5SAmlan Barua #else 6958a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 7058a912c5SAmlan Barua #endif 71b2b77a04SHong Zhang break; 72b2b77a04SHong Zhang case 3: 7358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 74b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 7558a912c5SAmlan Barua #else 7658a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 7758a912c5SAmlan Barua #endif 78b2b77a04SHong Zhang break; 79b2b77a04SHong Zhang default: 8058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 81a1b6d50cSHong Zhang iodims = fftw->iodims; 82a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 838c1d5ab3SHong Zhang if (ndim) { 84a1b6d50cSHong Zhang iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1]; 858c1d5ab3SHong Zhang iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 868c1d5ab3SHong Zhang for (i = ndim - 2; i >= 0; --i) { 87a1b6d50cSHong Zhang iodims[i].n = (ptrdiff_t)dim[i]; 888c1d5ab3SHong Zhang iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 898c1d5ab3SHong Zhang } 908c1d5ab3SHong Zhang } 91a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 92a1b6d50cSHong Zhang #else 93a1b6d50cSHong Zhang if (ndim) { 94a1b6d50cSHong Zhang iodims[ndim - 1].n = (int)dim[ndim - 1]; 95a1b6d50cSHong Zhang iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 96a1b6d50cSHong Zhang for (i = ndim - 2; i >= 0; --i) { 97a1b6d50cSHong Zhang iodims[i].n = (int)dim[i]; 98a1b6d50cSHong Zhang iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 99a1b6d50cSHong Zhang } 100a1b6d50cSHong Zhang } 101a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 102a1b6d50cSHong Zhang #endif 103a1b6d50cSHong Zhang 10458a912c5SAmlan Barua #else 105a31b9140SHong Zhang fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 10658a912c5SAmlan Barua #endif 107b2b77a04SHong Zhang break; 108b2b77a04SHong Zhang } 109e8bdd179SBarry Smith if (fftw->p_flag != FFTW_ESTIMATE) { 110e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 111e8bdd179SBarry Smith PetscCall(VecRestoreArrayRead(xx, &x_array)); 112e8bdd179SBarry Smith PetscCall(VecDestroy(&xx)); 113e8bdd179SBarry Smith PetscCall(VecGetArrayRead(x, &x_array)); 114e8bdd179SBarry Smith } else { 115f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *)x_array; 116b2b77a04SHong Zhang fftw->foutarray = y_array; 117e8bdd179SBarry Smith } 118e8bdd179SBarry Smith } 119e8bdd179SBarry Smith 120b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 1217e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 122b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 1237e4bc134SDominic Meiser #else 1247e4bc134SDominic Meiser fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array); 1257e4bc134SDominic Meiser #endif 126b2b77a04SHong Zhang } else { 127b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 128b2b77a04SHong Zhang } 1299566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 1309566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 1313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 132b2b77a04SHong Zhang } 133b2b77a04SHong Zhang 134*523895eeSPierre Jolivet static PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y) 135d71ae5a4SJacob Faibussowitsch { 136b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 137b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 138f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 139f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 140b2b77a04SHong Zhang PetscInt ndim = fft->ndim, *dim = fft->dim; 141e8bdd179SBarry Smith Vec xx; 1421acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 143a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 144a1b6d50cSHong Zhang fftw_iodim64 *iodims = fftw->iodims; 145a1b6d50cSHong Zhang #else 146a1b6d50cSHong Zhang fftw_iodim *iodims = fftw->iodims; 147a1b6d50cSHong Zhang #endif 1481acd80e4SHong Zhang #endif 149b2b77a04SHong Zhang 150b2b77a04SHong Zhang PetscFunctionBegin; 151e8bdd179SBarry Smith if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) { 152e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 153e8bdd179SBarry Smith PetscCall(VecDuplicate(x, &xx)); 154e8bdd179SBarry Smith PetscCall(VecGetArrayRead(xx, &x_array)); 155e8bdd179SBarry Smith } else { 1569566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 157e8bdd179SBarry Smith } 1589566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 1596aad120cSJose E. Roman if (!fftw->p_backward) { /* create a plan, then execute it */ 160b2b77a04SHong Zhang switch (ndim) { 161b2b77a04SHong Zhang case 1: 16258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 163b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 16458a912c5SAmlan Barua #else 165e81bb599SAmlan Barua fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 16658a912c5SAmlan Barua #endif 167b2b77a04SHong Zhang break; 168b2b77a04SHong Zhang case 2: 16958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 170b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 17158a912c5SAmlan Barua #else 172e81bb599SAmlan Barua fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 17358a912c5SAmlan Barua #endif 174b2b77a04SHong Zhang break; 175b2b77a04SHong Zhang case 3: 17658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 177b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 17858a912c5SAmlan Barua #else 179e81bb599SAmlan Barua fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 18058a912c5SAmlan Barua #endif 181b2b77a04SHong Zhang break; 182b2b77a04SHong Zhang default: 18358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 184a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 185a1b6d50cSHong Zhang fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 186a1b6d50cSHong Zhang #else 1878c1d5ab3SHong Zhang fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 188a1b6d50cSHong Zhang #endif 18958a912c5SAmlan Barua #else 190a31b9140SHong Zhang fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 19158a912c5SAmlan Barua #endif 192b2b77a04SHong Zhang break; 193b2b77a04SHong Zhang } 194e8bdd179SBarry Smith if (fftw->p_flag != FFTW_ESTIMATE) { 195e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 196e8bdd179SBarry Smith PetscCall(VecRestoreArrayRead(xx, &x_array)); 197e8bdd179SBarry Smith PetscCall(VecDestroy(&xx)); 198e8bdd179SBarry Smith PetscCall(VecGetArrayRead(x, &x_array)); 199e8bdd179SBarry Smith } else { 200f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *)x_array; 201b2b77a04SHong Zhang fftw->boutarray = y_array; 202e8bdd179SBarry Smith } 203e8bdd179SBarry Smith } 204b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 2057e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 206b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 2077e4bc134SDominic Meiser #else 2087e4bc134SDominic Meiser fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array); 2097e4bc134SDominic Meiser #endif 210b2b77a04SHong Zhang } else { 2112f613bf5SBarry Smith fftw_execute(fftw->p_backward); 212b2b77a04SHong Zhang } 2139566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 2149566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 2153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 216b2b77a04SHong Zhang } 217b2b77a04SHong Zhang 2180cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 219*523895eeSPierre Jolivet static PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y) 220d71ae5a4SJacob Faibussowitsch { 221b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 222b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 223f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 224f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 225c92418ccSAmlan Barua PetscInt ndim = fft->ndim, *dim = fft->dim; 226ce94432eSBarry Smith MPI_Comm comm; 227e8bdd179SBarry Smith Vec xx; 228b2b77a04SHong Zhang 229b2b77a04SHong Zhang PetscFunctionBegin; 2309566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 231e8bdd179SBarry Smith if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) { 232e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 233e8bdd179SBarry Smith PetscCall(VecDuplicate(x, &xx)); 234e8bdd179SBarry Smith PetscCall(VecGetArrayRead(xx, &x_array)); 235e8bdd179SBarry Smith } else { 2369566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 237e8bdd179SBarry Smith } 2389566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 2396aad120cSJose E. Roman if (!fftw->p_forward) { /* create a plan, then execute it */ 240b2b77a04SHong Zhang switch (ndim) { 241b2b77a04SHong Zhang case 1: 2423c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 243b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 244ae0a50aaSHong Zhang #else 2454f8276c3SHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 2463c3a9c75SAmlan Barua #endif 247b2b77a04SHong Zhang break; 248b2b77a04SHong Zhang case 2: 24997504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 250b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 2513c3a9c75SAmlan Barua #else 2523c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 2533c3a9c75SAmlan Barua #endif 254b2b77a04SHong Zhang break; 255b2b77a04SHong Zhang case 3: 2563c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 257b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 2583c3a9c75SAmlan Barua #else 25951d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 2603c3a9c75SAmlan Barua #endif 261b2b77a04SHong Zhang break; 262b2b77a04SHong Zhang default: 2633c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 264c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 2653c3a9c75SAmlan Barua #else 2663c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 2673c3a9c75SAmlan Barua #endif 268b2b77a04SHong Zhang break; 269b2b77a04SHong Zhang } 270e8bdd179SBarry Smith if (fftw->p_flag != FFTW_ESTIMATE) { 271e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 272e8bdd179SBarry Smith PetscCall(VecRestoreArrayRead(xx, &x_array)); 273e8bdd179SBarry Smith PetscCall(VecDestroy(&xx)); 274e8bdd179SBarry Smith PetscCall(VecGetArrayRead(x, &x_array)); 275e8bdd179SBarry Smith } else { 276f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *)x_array; 277b2b77a04SHong Zhang fftw->foutarray = y_array; 278e8bdd179SBarry Smith } 279e8bdd179SBarry Smith } 280b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 281b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 282b2b77a04SHong Zhang } else { 283b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 284b2b77a04SHong Zhang } 2859566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 2869566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 2873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 288b2b77a04SHong Zhang } 289b2b77a04SHong Zhang 290*523895eeSPierre Jolivet static PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y) 291d71ae5a4SJacob Faibussowitsch { 292b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 293b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 294f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 295f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 296c92418ccSAmlan Barua PetscInt ndim = fft->ndim, *dim = fft->dim; 297ce94432eSBarry Smith MPI_Comm comm; 298e8bdd179SBarry Smith Vec xx; 299b2b77a04SHong Zhang 300b2b77a04SHong Zhang PetscFunctionBegin; 3019566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 302e8bdd179SBarry Smith if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) { 303e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 304e8bdd179SBarry Smith PetscCall(VecDuplicate(x, &xx)); 305e8bdd179SBarry Smith PetscCall(VecGetArrayRead(xx, &x_array)); 306e8bdd179SBarry Smith } else { 3079566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 308e8bdd179SBarry Smith } 3099566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 3106aad120cSJose E. Roman if (!fftw->p_backward) { /* create a plan, then execute it */ 311b2b77a04SHong Zhang switch (ndim) { 312b2b77a04SHong Zhang case 1: 3133c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 314b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 315ae0a50aaSHong Zhang #else 3164f8276c3SHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 3173c3a9c75SAmlan Barua #endif 318b2b77a04SHong Zhang break; 319b2b77a04SHong Zhang case 2: 32097504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 321b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 3223c3a9c75SAmlan Barua #else 3233c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 3243c3a9c75SAmlan Barua #endif 325b2b77a04SHong Zhang break; 326b2b77a04SHong Zhang case 3: 3273c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 328b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 3293c3a9c75SAmlan Barua #else 3303c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 3313c3a9c75SAmlan Barua #endif 332b2b77a04SHong Zhang break; 333b2b77a04SHong Zhang default: 3343c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 335c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 3363c3a9c75SAmlan Barua #else 3373c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 3383c3a9c75SAmlan Barua #endif 339b2b77a04SHong Zhang break; 340b2b77a04SHong Zhang } 341e8bdd179SBarry Smith if (fftw->p_flag != FFTW_ESTIMATE) { 342e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 343e8bdd179SBarry Smith PetscCall(VecRestoreArrayRead(xx, &x_array)); 344e8bdd179SBarry Smith PetscCall(VecDestroy(&xx)); 345e8bdd179SBarry Smith PetscCall(VecGetArrayRead(x, &x_array)); 346e8bdd179SBarry Smith } else { 347f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *)x_array; 348b2b77a04SHong Zhang fftw->boutarray = y_array; 349e8bdd179SBarry Smith } 350e8bdd179SBarry Smith } 351b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 352b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 353b2b77a04SHong Zhang } else { 3542f613bf5SBarry Smith fftw_execute(fftw->p_backward); 355b2b77a04SHong Zhang } 3569566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 3579566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 3583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 359b2b77a04SHong Zhang } 3600cf2e031SBarry Smith #endif 361b2b77a04SHong Zhang 362*523895eeSPierre Jolivet static PetscErrorCode MatDestroy_FFTW(Mat A) 363d71ae5a4SJacob Faibussowitsch { 364b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 365b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 366b2b77a04SHong Zhang 367b2b77a04SHong Zhang PetscFunctionBegin; 368b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 369b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 370ad540459SPierre Jolivet if (fftw->iodims) free(fftw->iodims); 3719566063dSJacob Faibussowitsch PetscCall(PetscFree(fftw->dim_fftw)); 3729566063dSJacob Faibussowitsch PetscCall(PetscFree(fft->data)); 3732e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL)); 3742e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL)); 3752e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL)); 3760cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 3776ccf0b3eSHong Zhang fftw_mpi_cleanup(); 3780cf2e031SBarry Smith #endif 3793ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 380b2b77a04SHong Zhang } 381b2b77a04SHong Zhang 3820cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 383c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 384*523895eeSPierre Jolivet static PetscErrorCode VecDestroy_MPIFFTW(Vec v) 385d71ae5a4SJacob Faibussowitsch { 386b2b77a04SHong Zhang PetscScalar *array; 387b2b77a04SHong Zhang 388b2b77a04SHong Zhang PetscFunctionBegin; 3899566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &array)); 3902f613bf5SBarry Smith fftw_free((fftw_complex *)array); 3919566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &array)); 3929566063dSJacob Faibussowitsch PetscCall(VecDestroy_MPI(v)); 3933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 394b2b77a04SHong Zhang } 3950cf2e031SBarry Smith #endif 396b2b77a04SHong Zhang 3970cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 398d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new) 399d71ae5a4SJacob Faibussowitsch { 4005b113e21Ss-sajid-ali Mat A; 4015b113e21Ss-sajid-ali 4025b113e21Ss-sajid-ali PetscFunctionBegin; 4039566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A)); 4049566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL)); 4053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4065b113e21Ss-sajid-ali } 4075b113e21Ss-sajid-ali 408d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new) 409d71ae5a4SJacob Faibussowitsch { 4105b113e21Ss-sajid-ali Mat A; 4115b113e21Ss-sajid-ali 4125b113e21Ss-sajid-ali PetscFunctionBegin; 4139566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A)); 4149566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL)); 4153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4165b113e21Ss-sajid-ali } 4175b113e21Ss-sajid-ali 418d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) 419d71ae5a4SJacob Faibussowitsch { 4205b113e21Ss-sajid-ali Mat A; 4215b113e21Ss-sajid-ali 4225b113e21Ss-sajid-ali PetscFunctionBegin; 4239566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A)); 4249566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new)); 4253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4265b113e21Ss-sajid-ali } 4270cf2e031SBarry Smith #endif 4285b113e21Ss-sajid-ali 4294be45526SHong Zhang /*@ 4302a7a6963SBarry Smith MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 43111a5261eSBarry Smith parallel layout determined by `MATFFTW` 4324f7415efSAmlan Barua 433c3339decSBarry Smith Collective 4344f7415efSAmlan Barua 4354f7415efSAmlan Barua Input Parameter: 4363564f093SHong Zhang . A - the matrix 4374f7415efSAmlan Barua 438d8d19677SJose E. Roman Output Parameters: 439cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW 4406b867d5aSJose E. Roman . y - (optional) output vector of forward FFTW 441cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW 4424f7415efSAmlan Barua 4432ef1f0ffSBarry Smith Options Database Key: 4442ef1f0ffSBarry Smith . -mat_fftw_plannerflags - set FFTW planner flags 4452ef1f0ffSBarry Smith 4464f7415efSAmlan Barua Level: advanced 4473564f093SHong Zhang 44811a5261eSBarry Smith Notes: 44911a5261eSBarry Smith The parallel layout of output of forward FFTW is always same as the input 45097504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 45197504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 45211a5261eSBarry Smith 45311a5261eSBarry Smith We need to provide enough space while doing parallel real transform. 45497504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 45597504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 456e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 457e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two 458e0ec536eSAmlan Barua zeros if it is odd only one zero is needed. 45911a5261eSBarry Smith 460e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local 461e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for 462e0ec536eSAmlan Barua each processor and returns that. 4634f7415efSAmlan Barua 464fe59aa6dSJacob Faibussowitsch Developer Notes: 46511a5261eSBarry Smith How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically? 46611a5261eSBarry Smith 4671cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()` 4684be45526SHong Zhang @*/ 469d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z) 470d71ae5a4SJacob Faibussowitsch { 4714be45526SHong Zhang PetscFunctionBegin; 472cac4c232SBarry Smith PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z)); 4733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4744be45526SHong Zhang } 4754be45526SHong Zhang 476d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout) 477d71ae5a4SJacob Faibussowitsch { 4784f7415efSAmlan Barua PetscMPIInt size, rank; 479ce94432eSBarry Smith MPI_Comm comm; 4804f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT *)A->data; 4814f7415efSAmlan Barua 4824f7415efSAmlan Barua PetscFunctionBegin; 4834f7415efSAmlan Barua PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4844f7415efSAmlan Barua PetscValidType(A, 1); 4859566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 4864f7415efSAmlan Barua 4879566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 4889566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4894f7415efSAmlan Barua if (size == 1) { /* sequential case */ 4904f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4919566063dSJacob Faibussowitsch if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin)); 4929566063dSJacob Faibussowitsch if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout)); 4939566063dSJacob Faibussowitsch if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout)); 4944f7415efSAmlan Barua #else 4959566063dSJacob Faibussowitsch if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin)); 4969566063dSJacob Faibussowitsch if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout)); 4979566063dSJacob Faibussowitsch if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout)); 4984f7415efSAmlan Barua #endif 4990cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 50097504071SAmlan Barua } else { /* parallel cases */ 5010cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 5020cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 5034f7415efSAmlan Barua ptrdiff_t alloc_local, local_n0, local_0_start; 5049496c9aeSAmlan Barua ptrdiff_t local_n1; 5059496c9aeSAmlan Barua fftw_complex *data_fout; 5069496c9aeSAmlan Barua ptrdiff_t local_1_start; 5079496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 5089496c9aeSAmlan Barua fftw_complex *data_fin, *data_bout; 5099496c9aeSAmlan Barua #else 5104f7415efSAmlan Barua double *data_finr, *data_boutr; 5116ccf0b3eSHong Zhang PetscInt n1, N1; 5129496c9aeSAmlan Barua ptrdiff_t temp; 5139496c9aeSAmlan Barua #endif 5149496c9aeSAmlan Barua 5154f7415efSAmlan Barua switch (ndim) { 5164f7415efSAmlan Barua case 1: 51757625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5184f8276c3SHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform"); 51957625b84SAmlan Barua #else 52057625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 52157625b84SAmlan Barua if (fin) { 52257625b84SAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5239566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin)); 5249566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 5255b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 52657625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 52757625b84SAmlan Barua } 52857625b84SAmlan Barua if (fout) { 52957625b84SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5309566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout)); 5319566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 5325b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 53357625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 53457625b84SAmlan Barua } 53557625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 53657625b84SAmlan Barua if (bout) { 53757625b84SAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5389566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout)); 5399566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 5405b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 54157625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 54257625b84SAmlan Barua } 54357625b84SAmlan Barua break; 54457625b84SAmlan Barua #endif 5454f7415efSAmlan Barua case 2: 54697504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 5474f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 5489371c9d4SSatish Balay N1 = 2 * dim[0] * (dim[1] / 2 + 1); 5499371c9d4SSatish Balay n1 = 2 * local_n0 * (dim[1] / 2 + 1); 5504f7415efSAmlan Barua if (fin) { 5514f7415efSAmlan Barua data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 5529566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 5539566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 5545b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5554f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5564f7415efSAmlan Barua } 55757625b84SAmlan Barua if (fout) { 55857625b84SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5599566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout)); 5609566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 5615b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 56257625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 56357625b84SAmlan Barua } 5645b113e21Ss-sajid-ali if (bout) { 5655b113e21Ss-sajid-ali data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 5669566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 5679566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 5685b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 5695b113e21Ss-sajid-ali (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5705b113e21Ss-sajid-ali } 5714f7415efSAmlan Barua #else 5724f7415efSAmlan Barua /* Get local size */ 5734f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 5744f7415efSAmlan Barua if (fin) { 5754f7415efSAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5769566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 5779566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 5785b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5794f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5804f7415efSAmlan Barua } 5814f7415efSAmlan Barua if (fout) { 5824f7415efSAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5839566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 5849566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 5855b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 5864f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5874f7415efSAmlan Barua } 5884f7415efSAmlan Barua if (bout) { 5894f7415efSAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5909566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 5919566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 5925b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 5934f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5944f7415efSAmlan Barua } 5954f7415efSAmlan Barua #endif 5964f7415efSAmlan Barua break; 5974f7415efSAmlan Barua case 3: 5984f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5994f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 6009371c9d4SSatish Balay N1 = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1); 6019371c9d4SSatish Balay n1 = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1); 6024f7415efSAmlan Barua if (fin) { 6034f7415efSAmlan Barua data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 6049566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 6059566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 6065b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6074f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6084f7415efSAmlan Barua } 6095b113e21Ss-sajid-ali if (fout) { 6105b113e21Ss-sajid-ali data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6119566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout)); 6129566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 6135b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6145b113e21Ss-sajid-ali (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6155b113e21Ss-sajid-ali } 6164f7415efSAmlan Barua if (bout) { 6174f7415efSAmlan Barua data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 6189566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 6199566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6205b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6214f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6224f7415efSAmlan Barua } 6234f7415efSAmlan Barua #else 6240c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 6250c9b18e4SAmlan Barua if (fin) { 6260c9b18e4SAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6279566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 6289566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 6295b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6300c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6310c9b18e4SAmlan Barua } 6320c9b18e4SAmlan Barua if (fout) { 6330c9b18e4SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6349566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 6359566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 6365b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6370c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6380c9b18e4SAmlan Barua } 6390c9b18e4SAmlan Barua if (bout) { 6400c9b18e4SAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6419566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 6429566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6435b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6440c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6450c9b18e4SAmlan Barua } 6464f7415efSAmlan Barua #endif 6474f7415efSAmlan Barua break; 6484f7415efSAmlan Barua default: 6494f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6504f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 6514f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 6524f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 653f4f49eeaSPierre Jolivet N1 = 2 * fft->N * (PetscInt)(fftw->dim_fftw[fftw->ndim_fftw - 1]) / ((PetscInt)temp); 6544f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 6554f7415efSAmlan Barua 6564f7415efSAmlan Barua if (fin) { 6574f7415efSAmlan Barua data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 6589566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin)); 6599566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 6605b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6614f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6624f7415efSAmlan Barua } 6635b113e21Ss-sajid-ali if (fout) { 6645b113e21Ss-sajid-ali data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6659566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout)); 6669566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 6675b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6685b113e21Ss-sajid-ali (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6695b113e21Ss-sajid-ali } 6704f7415efSAmlan Barua if (bout) { 6714f7415efSAmlan Barua data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 6729566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout)); 6739566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6745b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6759496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6764f7415efSAmlan Barua } 6774f7415efSAmlan Barua #else 6780c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 6790c9b18e4SAmlan Barua if (fin) { 6800c9b18e4SAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6819566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 6829566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 6835b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6840c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6850c9b18e4SAmlan Barua } 6860c9b18e4SAmlan Barua if (fout) { 6870c9b18e4SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6889566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 6899566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 6905b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6910c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6920c9b18e4SAmlan Barua } 6930c9b18e4SAmlan Barua if (bout) { 6940c9b18e4SAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6959566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 6969566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6975b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6980c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6990c9b18e4SAmlan Barua } 7004f7415efSAmlan Barua #endif 7014f7415efSAmlan Barua break; 7024f7415efSAmlan Barua } 703f9d91177SJunchao Zhang /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 704f9d91177SJunchao Zhang v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 705da81f932SPierre Jolivet memory leaks. We void these pointers here to avoid accident uses. 706f9d91177SJunchao Zhang */ 707f9d91177SJunchao Zhang if (fin) (*fin)->ops->replacearray = NULL; 708f9d91177SJunchao Zhang if (fout) (*fout)->ops->replacearray = NULL; 709f9d91177SJunchao Zhang if (bout) (*bout)->ops->replacearray = NULL; 7100cf2e031SBarry Smith #endif 7114f7415efSAmlan Barua } 7123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7134f7415efSAmlan Barua } 7144f7415efSAmlan Barua 7153564f093SHong Zhang /*@ 71611a5261eSBarry Smith VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls. 71754efbe56SHong Zhang 718c3339decSBarry Smith Collective 7193564f093SHong Zhang 7203564f093SHong Zhang Input Parameters: 7213564f093SHong Zhang + A - FFTW matrix 7223564f093SHong Zhang - x - the PETSc vector 7233564f093SHong Zhang 7242fe279fdSBarry Smith Output Parameter: 7253564f093SHong Zhang . y - the FFTW vector 7263564f093SHong Zhang 727b2b77a04SHong Zhang Level: intermediate 7283564f093SHong Zhang 72911a5261eSBarry Smith Note: 73011a5261eSBarry Smith For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 73197504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 73297504071SAmlan Barua zeros. This routine does that job by scattering operation. 73397504071SAmlan Barua 7341cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()` 7353564f093SHong Zhang @*/ 736d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y) 737d71ae5a4SJacob Faibussowitsch { 7383564f093SHong Zhang PetscFunctionBegin; 739cac4c232SBarry Smith PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y)); 7403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7413564f093SHong Zhang } 7423c3a9c75SAmlan Barua 74366976f2fSJacob Faibussowitsch static PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y) 744d71ae5a4SJacob Faibussowitsch { 745ce94432eSBarry Smith MPI_Comm comm; 7463c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT *)A->data; 7479496c9aeSAmlan Barua PetscInt low; 7487a21806cSHong Zhang PetscMPIInt rank, size; 7497a21806cSHong Zhang PetscInt vsize, vsize1; 7503c3a9c75SAmlan Barua VecScatter vecscat; 7510cf2e031SBarry Smith IS list1; 7523c3a9c75SAmlan Barua 7533564f093SHong Zhang PetscFunctionBegin; 7549566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 7559566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 7569566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 7579566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(y, &low, NULL)); 7583c3a9c75SAmlan Barua 7593564f093SHong Zhang if (size == 1) { 7609566063dSJacob Faibussowitsch PetscCall(VecGetSize(x, &vsize)); 7619566063dSJacob Faibussowitsch PetscCall(VecGetSize(y, &vsize1)); 7629566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1)); 7639566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 7649566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7659566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7669566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 7679566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 7680cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 7693564f093SHong Zhang } else { 7700cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 7710cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 7720cf2e031SBarry Smith ptrdiff_t local_n0, local_0_start; 7730cf2e031SBarry Smith ptrdiff_t local_n1, local_1_start; 7740cf2e031SBarry Smith IS list2; 7750cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 7760cf2e031SBarry Smith PetscInt i, j, k, partial_dim; 7770cf2e031SBarry Smith PetscInt *indx1, *indx2, tempindx, tempindx1; 7780cf2e031SBarry Smith PetscInt NM; 7790cf2e031SBarry Smith ptrdiff_t temp; 7800cf2e031SBarry Smith #endif 7810cf2e031SBarry Smith 7823c3a9c75SAmlan Barua switch (ndim) { 7833c3a9c75SAmlan Barua case 1: 78464657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7857a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 78626fbe8dcSKarl Rupp 7879566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1)); 7889566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2)); 7899566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 7909566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7919566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7929566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 7939566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 7949566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 79564657d84SAmlan Barua #else 796e5d7f247SAmlan Barua SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 79764657d84SAmlan Barua #endif 7983c3a9c75SAmlan Barua break; 7993c3a9c75SAmlan Barua case 2: 800bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8017a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 80226fbe8dcSKarl Rupp 8039566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 8049566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 8059566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8069566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8079566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8089566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8099566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8109566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 811bd59e6c6SAmlan Barua #else 812c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 81326fbe8dcSKarl Rupp 81457508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1)); 81557508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2)); 8163c3a9c75SAmlan Barua 8173564f093SHong Zhang if (dim[1] % 2 == 0) { 8183c3a9c75SAmlan Barua NM = dim[1] + 2; 8193564f093SHong Zhang } else { 8203c3a9c75SAmlan Barua NM = dim[1] + 1; 8213564f093SHong Zhang } 8223c3a9c75SAmlan Barua for (i = 0; i < local_n0; i++) { 8233c3a9c75SAmlan Barua for (j = 0; j < dim[1]; j++) { 8243c3a9c75SAmlan Barua tempindx = i * dim[1] + j; 8253c3a9c75SAmlan Barua tempindx1 = i * NM + j; 82626fbe8dcSKarl Rupp 8275b3e41ffSAmlan Barua indx1[tempindx] = local_0_start * dim[1] + tempindx; 8283c3a9c75SAmlan Barua indx2[tempindx] = low + tempindx1; 8293c3a9c75SAmlan Barua } 8303c3a9c75SAmlan Barua } 8313c3a9c75SAmlan Barua 8329566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 8339566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 8343c3a9c75SAmlan Barua 8359566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8369566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8379566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8389566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8399566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8409566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 8419566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 8429566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 843bd59e6c6SAmlan Barua #endif 8449496c9aeSAmlan Barua break; 8453c3a9c75SAmlan Barua 8463c3a9c75SAmlan Barua case 3: 847bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8487a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 84926fbe8dcSKarl Rupp 8509566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 8519566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 8529566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8539566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8549566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8559566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8569566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8579566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 858bd59e6c6SAmlan Barua #else 859c4762a1bSJed Brown /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 860f1ade23cSHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform"); 8617a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 86226fbe8dcSKarl Rupp 86357508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1)); 86457508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2)); 86551d1eed7SAmlan Barua 866db4deed7SKarl Rupp if (dim[2] % 2 == 0) NM = dim[2] + 2; 867db4deed7SKarl Rupp else NM = dim[2] + 1; 86851d1eed7SAmlan Barua 86951d1eed7SAmlan Barua for (i = 0; i < local_n0; i++) { 87051d1eed7SAmlan Barua for (j = 0; j < dim[1]; j++) { 87151d1eed7SAmlan Barua for (k = 0; k < dim[2]; k++) { 87251d1eed7SAmlan Barua tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 87351d1eed7SAmlan Barua tempindx1 = i * dim[1] * NM + j * NM + k; 87426fbe8dcSKarl Rupp 87551d1eed7SAmlan Barua indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 87651d1eed7SAmlan Barua indx2[tempindx] = low + tempindx1; 87751d1eed7SAmlan Barua } 87851d1eed7SAmlan Barua } 87951d1eed7SAmlan Barua } 88051d1eed7SAmlan Barua 8819566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 8829566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 8839566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8849566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8859566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8869566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8879566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8889566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 8899566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 8909566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 891bd59e6c6SAmlan Barua #endif 8929496c9aeSAmlan Barua break; 8933c3a9c75SAmlan Barua 8943c3a9c75SAmlan Barua default: 895bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8967a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 89726fbe8dcSKarl Rupp 8989566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 8999566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 9009566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 9019566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9029566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9039566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 9049566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 9059566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 906bd59e6c6SAmlan Barua #else 907c4762a1bSJed Brown /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 908f1ade23cSHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform"); 909e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 91026fbe8dcSKarl Rupp 911e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 91226fbe8dcSKarl Rupp 9137a21806cSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 91426fbe8dcSKarl Rupp 915e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 916e5d7f247SAmlan Barua 917e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 918e5d7f247SAmlan Barua 91957508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1)); 92057508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2)); 921e5d7f247SAmlan Barua 922db4deed7SKarl Rupp if (dim[ndim - 1] % 2 == 0) NM = 2; 923db4deed7SKarl Rupp else NM = 1; 924e5d7f247SAmlan Barua 9256971673cSAmlan Barua j = low; 92657508eceSPierre Jolivet for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) { 9276971673cSAmlan Barua indx1[i] = local_0_start * partial_dim + i; 9286971673cSAmlan Barua indx2[i] = j; 92926fbe8dcSKarl Rupp if (k % dim[ndim - 1] == 0) j += NM; 9306971673cSAmlan Barua j++; 9316971673cSAmlan Barua } 9329566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 9339566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 9349566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 9359566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9369566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9379566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 9389566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 9399566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 9409566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 9419566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 942bd59e6c6SAmlan Barua #endif 9439496c9aeSAmlan Barua break; 9443c3a9c75SAmlan Barua } 9450cf2e031SBarry Smith #endif 946e81bb599SAmlan Barua } 9473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9483c3a9c75SAmlan Barua } 9493c3a9c75SAmlan Barua 9503564f093SHong Zhang /*@ 95111a5261eSBarry Smith VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector. 9523564f093SHong Zhang 953c3339decSBarry Smith Collective 9543564f093SHong Zhang 9553564f093SHong Zhang Input Parameters: 95611a5261eSBarry Smith + A - `MATFFTW` matrix 9573564f093SHong Zhang - x - FFTW vector 9583564f093SHong Zhang 9592fe279fdSBarry Smith Output Parameter: 9603564f093SHong Zhang . y - PETSc vector 9613564f093SHong Zhang 9623564f093SHong Zhang Level: intermediate 9633564f093SHong Zhang 96411a5261eSBarry Smith Note: 96511a5261eSBarry Smith While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 96611a5261eSBarry Smith `VecScatterFFTWToPetsc()` removes those extra zeros. 9673564f093SHong Zhang 9681cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()` 9693564f093SHong Zhang @*/ 970d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y) 971d71ae5a4SJacob Faibussowitsch { 9723c3a9c75SAmlan Barua PetscFunctionBegin; 973cac4c232SBarry Smith PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y)); 9743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 9753c3a9c75SAmlan Barua } 97654efbe56SHong Zhang 97766976f2fSJacob Faibussowitsch static PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y) 978d71ae5a4SJacob Faibussowitsch { 979ce94432eSBarry Smith MPI_Comm comm; 9805b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT *)A->data; 9819496c9aeSAmlan Barua PetscInt low; 9827a21806cSHong Zhang PetscMPIInt rank, size; 9835b3e41ffSAmlan Barua VecScatter vecscat; 9840cf2e031SBarry Smith IS list1; 9859496c9aeSAmlan Barua 9863564f093SHong Zhang PetscFunctionBegin; 9879566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 9889566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 9899566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 9909566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(x, &low, NULL)); 9915b3e41ffSAmlan Barua 992e81bb599SAmlan Barua if (size == 1) { 9939566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1)); 9949566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 9959566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9969566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9979566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 9989566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 999e81bb599SAmlan Barua 10000cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 10013564f093SHong Zhang } else { 10020cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 10030cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 10040cf2e031SBarry Smith ptrdiff_t local_n0, local_0_start; 10050cf2e031SBarry Smith ptrdiff_t local_n1, local_1_start; 10060cf2e031SBarry Smith IS list2; 10070cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 10080cf2e031SBarry Smith PetscInt i, j, k, partial_dim; 10090cf2e031SBarry Smith PetscInt *indx1, *indx2, tempindx, tempindx1; 10100cf2e031SBarry Smith PetscInt NM; 10110cf2e031SBarry Smith ptrdiff_t temp; 10120cf2e031SBarry Smith #endif 1013e81bb599SAmlan Barua switch (ndim) { 1014e81bb599SAmlan Barua case 1: 101564657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10167a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 101726fbe8dcSKarl Rupp 10189566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1)); 10199566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2)); 10209566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 10219566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10229566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10239566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10249566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10259566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 102664657d84SAmlan Barua #else 10276a506ed8SAmlan Barua SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT"); 102864657d84SAmlan Barua #endif 10295b3e41ffSAmlan Barua break; 10305b3e41ffSAmlan Barua case 2: 1031bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10327a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 103326fbe8dcSKarl Rupp 10349566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 10359566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 10369566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 10379566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10389566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10399566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10409566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10419566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 1042bd59e6c6SAmlan Barua #else 10437a21806cSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 104426fbe8dcSKarl Rupp 104557508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1)); 104657508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2)); 10475b3e41ffSAmlan Barua 1048db4deed7SKarl Rupp if (dim[1] % 2 == 0) NM = dim[1] + 2; 1049db4deed7SKarl Rupp else NM = dim[1] + 1; 10505b3e41ffSAmlan Barua 10515b3e41ffSAmlan Barua for (i = 0; i < local_n0; i++) { 10525b3e41ffSAmlan Barua for (j = 0; j < dim[1]; j++) { 10535b3e41ffSAmlan Barua tempindx = i * dim[1] + j; 10545b3e41ffSAmlan Barua tempindx1 = i * NM + j; 105526fbe8dcSKarl Rupp 10565b3e41ffSAmlan Barua indx1[tempindx] = local_0_start * dim[1] + tempindx; 10575b3e41ffSAmlan Barua indx2[tempindx] = low + tempindx1; 10585b3e41ffSAmlan Barua } 10595b3e41ffSAmlan Barua } 10605b3e41ffSAmlan Barua 10619566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 10629566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 10635b3e41ffSAmlan Barua 10649566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 10659566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10669566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10679566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10689566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10699566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 10709566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 10719566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 1072bd59e6c6SAmlan Barua #endif 10739496c9aeSAmlan Barua break; 10745b3e41ffSAmlan Barua case 3: 1075bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10767a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 107726fbe8dcSKarl Rupp 10789566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 10799566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 10809566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 10819566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10829566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10839566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10849566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10859566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 1086bd59e6c6SAmlan Barua #else 10877a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 108826fbe8dcSKarl Rupp 108957508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1)); 109057508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2)); 109151d1eed7SAmlan Barua 1092db4deed7SKarl Rupp if (dim[2] % 2 == 0) NM = dim[2] + 2; 1093db4deed7SKarl Rupp else NM = dim[2] + 1; 109451d1eed7SAmlan Barua 109551d1eed7SAmlan Barua for (i = 0; i < local_n0; i++) { 109651d1eed7SAmlan Barua for (j = 0; j < dim[1]; j++) { 109751d1eed7SAmlan Barua for (k = 0; k < dim[2]; k++) { 109851d1eed7SAmlan Barua tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 109951d1eed7SAmlan Barua tempindx1 = i * dim[1] * NM + j * NM + k; 110026fbe8dcSKarl Rupp 110151d1eed7SAmlan Barua indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 110251d1eed7SAmlan Barua indx2[tempindx] = low + tempindx1; 110351d1eed7SAmlan Barua } 110451d1eed7SAmlan Barua } 110551d1eed7SAmlan Barua } 110651d1eed7SAmlan Barua 11079566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 11089566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 110951d1eed7SAmlan Barua 11109566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 11119566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11129566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11139566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 11149566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 11159566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 11169566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 11179566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 1118bd59e6c6SAmlan Barua #endif 11199496c9aeSAmlan Barua break; 11205b3e41ffSAmlan Barua default: 1121bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 11227a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 112326fbe8dcSKarl Rupp 11249566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 11259566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 11269566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 11279566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11289566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11299566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 11309566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 11319566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 1132bd59e6c6SAmlan Barua #else 1133ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 113426fbe8dcSKarl Rupp 1135ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 113626fbe8dcSKarl Rupp 1137c3eba89aSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 113826fbe8dcSKarl Rupp 1139ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 1140ba6e06d0SAmlan Barua 1141ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1142ba6e06d0SAmlan Barua 114357508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1)); 114457508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2)); 1145ba6e06d0SAmlan Barua 1146db4deed7SKarl Rupp if (dim[ndim - 1] % 2 == 0) NM = 2; 1147db4deed7SKarl Rupp else NM = 1; 1148ba6e06d0SAmlan Barua 1149ba6e06d0SAmlan Barua j = low; 115057508eceSPierre Jolivet for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) { 1151ba6e06d0SAmlan Barua indx1[i] = local_0_start * partial_dim + i; 1152ba6e06d0SAmlan Barua indx2[i] = j; 11533564f093SHong Zhang if (k % dim[ndim - 1] == 0) j += NM; 1154ba6e06d0SAmlan Barua j++; 1155ba6e06d0SAmlan Barua } 11569566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 11579566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 1158ba6e06d0SAmlan Barua 11599566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 11609566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11619566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11629566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 11639566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 11649566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 11659566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 11669566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 1167bd59e6c6SAmlan Barua #endif 11689496c9aeSAmlan Barua break; 11695b3e41ffSAmlan Barua } 11700cf2e031SBarry Smith #endif 1171e81bb599SAmlan Barua } 11723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 11735b3e41ffSAmlan Barua } 11745b3e41ffSAmlan Barua 1175350f1385SJose E. Roman /*MC 1176350f1385SJose E. Roman MATFFTW - "fftw" - Matrix type that provides FFT via the FFTW external package. 1177350f1385SJose E. Roman 1178350f1385SJose E. Roman Level: intermediate 1179350f1385SJose E. Roman 11801cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()` 1181350f1385SJose E. Roman M*/ 1182d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1183d71ae5a4SJacob Faibussowitsch { 1184ce94432eSBarry Smith MPI_Comm comm; 1185b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 1186b2b77a04SHong Zhang Mat_FFTW *fftw; 11870cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 11885274ed1bSHong Zhang const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"}; 11895274ed1bSHong Zhang unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE}; 1190b2b77a04SHong Zhang PetscBool flg; 11914f48bc67SAmlan Barua PetscInt p_flag, partial_dim = 1, ctr; 119211902ff2SHong Zhang PetscMPIInt size, rank; 11939496c9aeSAmlan Barua ptrdiff_t *pdim; 11949496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11950cf2e031SBarry Smith PetscInt tot_dim; 11969496c9aeSAmlan Barua #endif 11979496c9aeSAmlan Barua 1198b2b77a04SHong Zhang PetscFunctionBegin; 11999566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 12009566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 12019566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1202c92418ccSAmlan Barua 12030cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 12041878d478SAmlan Barua fftw_mpi_init(); 12050cf2e031SBarry Smith #endif 120611902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t)); 120711902ff2SHong Zhang pdim[0] = dim[0]; 12088ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12098ca4c034SAmlan Barua tot_dim = 2 * dim[0]; 12108ca4c034SAmlan Barua #endif 12113564f093SHong Zhang for (ctr = 1; ctr < ndim; ctr++) { 12126882372aSHong Zhang partial_dim *= dim[ctr]; 121311902ff2SHong Zhang pdim[ctr] = dim[ctr]; 12148ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1215db4deed7SKarl Rupp if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1); 1216db4deed7SKarl Rupp else tot_dim *= dim[ctr]; 12178ca4c034SAmlan Barua #endif 12186882372aSHong Zhang } 12193c3a9c75SAmlan Barua 1220b2b77a04SHong Zhang if (size == 1) { 1221e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12229566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N)); 12230cf2e031SBarry Smith fft->n = fft->N; 1224e81bb599SAmlan Barua #else 12259566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim)); 12260cf2e031SBarry Smith fft->n = tot_dim; 12270cf2e031SBarry Smith #endif 12280cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 12290cf2e031SBarry Smith } else { 12300cf2e031SBarry Smith ptrdiff_t local_n0, local_0_start, local_n1, local_1_start; 12310cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 12320cf2e031SBarry Smith ptrdiff_t temp; 12330cf2e031SBarry Smith PetscInt N1; 1234e81bb599SAmlan Barua #endif 1235e81bb599SAmlan Barua 1236b2b77a04SHong Zhang switch (ndim) { 1237b2b77a04SHong Zhang case 1: 12383c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12393c3a9c75SAmlan Barua SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 1240e5d7f247SAmlan Barua #else 12417a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 12420cf2e031SBarry Smith fft->n = (PetscInt)local_n0; 12439566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N)); 1244e5d7f247SAmlan Barua #endif 1245b2b77a04SHong Zhang break; 1246b2b77a04SHong Zhang case 2: 12475b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 12487a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 12490cf2e031SBarry Smith fft->n = (PetscInt)local_n0 * dim[1]; 12509566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 12515b3e41ffSAmlan Barua #else 1252c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 125326fbe8dcSKarl Rupp 12540cf2e031SBarry Smith fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1); 12559566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1))); 12565b3e41ffSAmlan Barua #endif 1257b2b77a04SHong Zhang break; 1258b2b77a04SHong Zhang case 3: 125951d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12607a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 126126fbe8dcSKarl Rupp 12620cf2e031SBarry Smith fft->n = (PetscInt)local_n0 * dim[1] * dim[2]; 12639566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 126451d1eed7SAmlan Barua #else 1265c3eba89aSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 126626fbe8dcSKarl Rupp 12670cf2e031SBarry Smith fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1); 12689566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1))); 126951d1eed7SAmlan Barua #endif 1270b2b77a04SHong Zhang break; 1271b2b77a04SHong Zhang default: 1272b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12737a21806cSHong Zhang fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start); 127426fbe8dcSKarl Rupp 12750cf2e031SBarry Smith fft->n = (PetscInt)local_n0 * partial_dim; 12769566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1277b3a17365SAmlan Barua #else 1278b3a17365SAmlan Barua temp = pdim[ndim - 1]; 127926fbe8dcSKarl Rupp 1280b3a17365SAmlan Barua pdim[ndim - 1] = temp / 2 + 1; 128126fbe8dcSKarl Rupp 1282c3eba89aSHong Zhang fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 128326fbe8dcSKarl Rupp 12840cf2e031SBarry Smith fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp; 12850cf2e031SBarry Smith N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp); 128626fbe8dcSKarl Rupp 1287b3a17365SAmlan Barua pdim[ndim - 1] = temp; 128826fbe8dcSKarl Rupp 12899566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1)); 1290b3a17365SAmlan Barua #endif 1291b2b77a04SHong Zhang break; 1292b2b77a04SHong Zhang } 12930cf2e031SBarry Smith #endif 1294b2b77a04SHong Zhang } 12952277172eSMark Adams free(pdim); 12969566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW)); 12974dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&fftw)); 1298b2b77a04SHong Zhang fft->data = (void *)fftw; 1299b2b77a04SHong Zhang 13000c74a584SJed Brown fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1301e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 130226fbe8dcSKarl Rupp 1303f4f49eeaSPierre Jolivet PetscCall(PetscMalloc1(ndim, &fftw->dim_fftw)); 13048c1d5ab3SHong Zhang if (size == 1) { 1305a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 1306a1b6d50cSHong Zhang fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim); 1307a1b6d50cSHong Zhang #else 13088c1d5ab3SHong Zhang fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim); 1309a1b6d50cSHong Zhang #endif 13108c1d5ab3SHong Zhang } 131126fbe8dcSKarl Rupp 1312b1301b2eSHong Zhang for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr]; 1313c92418ccSAmlan Barua 1314f4259b30SLisandro Dalcin fftw->p_forward = NULL; 1315f4259b30SLisandro Dalcin fftw->p_backward = NULL; 1316b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1317b2b77a04SHong Zhang 1318b2b77a04SHong Zhang if (size == 1) { 1319b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1320b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 13210cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 1322b2b77a04SHong Zhang } else { 1323b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1324b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 13250cf2e031SBarry Smith #endif 1326b2b77a04SHong Zhang } 1327b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1328b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 13294a52bad8SHong Zhang A->preallocated = PETSC_TRUE; 133026fbe8dcSKarl Rupp 13319566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW)); 13329566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW)); 13339566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW)); 1334b2b77a04SHong Zhang 1335b2b77a04SHong Zhang /* get runtime options */ 1336d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat"); 13379566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg)); 13385f80ce2aSJacob Faibussowitsch if (flg) fftw->p_flag = iplans[p_flag]; 1339d0609cedSBarry Smith PetscOptionsEnd(); 13403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1341b2b77a04SHong Zhang } 1342