1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4c4762a1bSJed Brown Testing examples can be found in ~src/mat/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 90cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 10c6db04a5SJed Brown #include <fftw3-mpi.h> 110cf2e031SBarry Smith #else 120cf2e031SBarry Smith #include <fftw3.h> 130cf2e031SBarry Smith #endif 14b2b77a04SHong Zhang EXTERN_C_END 15b2b77a04SHong Zhang 16b2b77a04SHong Zhang typedef struct { 17b9ae062cSHong Zhang ptrdiff_t ndim_fftw, *dim_fftw; 18a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 19a1b6d50cSHong Zhang fftw_iodim64 *iodims; 20a1b6d50cSHong Zhang #else 218c1d5ab3SHong Zhang fftw_iodim *iodims; 22a1b6d50cSHong Zhang #endif 23e5d7f247SAmlan Barua PetscInt partial_dim; 24b2b77a04SHong Zhang fftw_plan p_forward, p_backward; 25b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 26b2b77a04SHong Zhang PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays becaue fftw plan should be 27b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 28b2b77a04SHong Zhang } Mat_FFTW; 29b2b77a04SHong Zhang 30b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec); 31b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec); 320cf2e031SBarry Smith extern PetscErrorCode MatDestroy_FFTW(Mat); 330cf2e031SBarry Smith extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *); 340cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 35b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec); 36b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec); 37b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 380cf2e031SBarry Smith #endif 39b2b77a04SHong Zhang 400cf2e031SBarry Smith /* 410cf2e031SBarry Smith MatMult_SeqFFTW performs forward DFT 4297504071SAmlan Barua Input parameter: 433564f093SHong Zhang A - the matrix 4497504071SAmlan Barua x - the vector on which FDFT will be performed 4597504071SAmlan Barua 4697504071SAmlan Barua Output parameter: 4797504071SAmlan Barua y - vector that stores result of FDFT 4897504071SAmlan Barua */ 49*9371c9d4SSatish Balay PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y) { 50b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 51b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 52f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 53f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 541acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 55a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 56a1b6d50cSHong Zhang fftw_iodim64 *iodims; 57a1b6d50cSHong Zhang #else 588c1d5ab3SHong Zhang fftw_iodim *iodims; 59a1b6d50cSHong Zhang #endif 601acd80e4SHong Zhang PetscInt i; 611acd80e4SHong Zhang #endif 621acd80e4SHong Zhang PetscInt ndim = fft->ndim, *dim = fft->dim; 63b2b77a04SHong Zhang 64b2b77a04SHong Zhang PetscFunctionBegin; 659566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 669566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 676aad120cSJose E. Roman if (!fftw->p_forward) { /* create a plan, then execute it */ 68b2b77a04SHong Zhang switch (ndim) { 69b2b77a04SHong Zhang case 1: 7058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 71b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 7258a912c5SAmlan Barua #else 7358a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 7458a912c5SAmlan Barua #endif 75b2b77a04SHong Zhang break; 76b2b77a04SHong Zhang case 2: 7758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 78b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 7958a912c5SAmlan Barua #else 8058a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 8158a912c5SAmlan Barua #endif 82b2b77a04SHong Zhang break; 83b2b77a04SHong Zhang case 3: 8458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 85b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 8658a912c5SAmlan Barua #else 8758a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 8858a912c5SAmlan Barua #endif 89b2b77a04SHong Zhang break; 90b2b77a04SHong Zhang default: 9158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 92a1b6d50cSHong Zhang iodims = fftw->iodims; 93a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 948c1d5ab3SHong Zhang if (ndim) { 95a1b6d50cSHong Zhang iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1]; 968c1d5ab3SHong Zhang iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 978c1d5ab3SHong Zhang for (i = ndim - 2; i >= 0; --i) { 98a1b6d50cSHong Zhang iodims[i].n = (ptrdiff_t)dim[i]; 998c1d5ab3SHong Zhang iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 1008c1d5ab3SHong Zhang } 1018c1d5ab3SHong Zhang } 102a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 103a1b6d50cSHong Zhang #else 104a1b6d50cSHong Zhang if (ndim) { 105a1b6d50cSHong Zhang iodims[ndim - 1].n = (int)dim[ndim - 1]; 106a1b6d50cSHong Zhang iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 107a1b6d50cSHong Zhang for (i = ndim - 2; i >= 0; --i) { 108a1b6d50cSHong Zhang iodims[i].n = (int)dim[i]; 109a1b6d50cSHong Zhang iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 110a1b6d50cSHong Zhang } 111a1b6d50cSHong Zhang } 112a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 113a1b6d50cSHong Zhang #endif 114a1b6d50cSHong Zhang 11558a912c5SAmlan Barua #else 116a31b9140SHong Zhang fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 11758a912c5SAmlan Barua #endif 118b2b77a04SHong Zhang break; 119b2b77a04SHong Zhang } 120f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *)x_array; 121b2b77a04SHong Zhang fftw->foutarray = y_array; 122b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 123b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 124b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 125b2b77a04SHong Zhang } else { /* use existing plan */ 126b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 1277e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 128b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 1297e4bc134SDominic Meiser #else 1307e4bc134SDominic Meiser fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array); 1317e4bc134SDominic Meiser #endif 132b2b77a04SHong Zhang } else { 133b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 134b2b77a04SHong Zhang } 135b2b77a04SHong Zhang } 1369566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 1379566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 138b2b77a04SHong Zhang PetscFunctionReturn(0); 139b2b77a04SHong Zhang } 140b2b77a04SHong Zhang 14197504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT 14297504071SAmlan Barua Input parameter: 1433564f093SHong Zhang A - the matrix 14497504071SAmlan Barua x - the vector on which BDFT will be performed 14597504071SAmlan Barua 14697504071SAmlan Barua Output parameter: 14797504071SAmlan Barua y - vector that stores result of BDFT 14897504071SAmlan Barua */ 14997504071SAmlan Barua 150*9371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y) { 151b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 152b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 153f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 154f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 155b2b77a04SHong Zhang PetscInt ndim = fft->ndim, *dim = fft->dim; 1561acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 157a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 158a1b6d50cSHong Zhang fftw_iodim64 *iodims = fftw->iodims; 159a1b6d50cSHong Zhang #else 160a1b6d50cSHong Zhang fftw_iodim *iodims = fftw->iodims; 161a1b6d50cSHong Zhang #endif 1621acd80e4SHong Zhang #endif 163b2b77a04SHong Zhang 164b2b77a04SHong Zhang PetscFunctionBegin; 1659566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 1669566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 1676aad120cSJose E. Roman if (!fftw->p_backward) { /* create a plan, then execute it */ 168b2b77a04SHong Zhang switch (ndim) { 169b2b77a04SHong Zhang case 1: 17058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 171b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 17258a912c5SAmlan Barua #else 173e81bb599SAmlan Barua fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 17458a912c5SAmlan Barua #endif 175b2b77a04SHong Zhang break; 176b2b77a04SHong Zhang case 2: 17758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 178b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 17958a912c5SAmlan Barua #else 180e81bb599SAmlan Barua fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 18158a912c5SAmlan Barua #endif 182b2b77a04SHong Zhang break; 183b2b77a04SHong Zhang case 3: 18458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 185b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 18658a912c5SAmlan Barua #else 187e81bb599SAmlan Barua fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 18858a912c5SAmlan Barua #endif 189b2b77a04SHong Zhang break; 190b2b77a04SHong Zhang default: 19158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 192a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 193a1b6d50cSHong Zhang fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 194a1b6d50cSHong Zhang #else 1958c1d5ab3SHong Zhang fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 196a1b6d50cSHong Zhang #endif 19758a912c5SAmlan Barua #else 198a31b9140SHong Zhang fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 19958a912c5SAmlan Barua #endif 200b2b77a04SHong Zhang break; 201b2b77a04SHong Zhang } 202f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *)x_array; 203b2b77a04SHong Zhang fftw->boutarray = y_array; 2042f613bf5SBarry Smith fftw_execute(fftw->p_backward); 205b2b77a04SHong Zhang } else { /* use existing plan */ 206b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 2077e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 208b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 2097e4bc134SDominic Meiser #else 2107e4bc134SDominic Meiser fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array); 2117e4bc134SDominic Meiser #endif 212b2b77a04SHong Zhang } else { 2132f613bf5SBarry Smith fftw_execute(fftw->p_backward); 214b2b77a04SHong Zhang } 215b2b77a04SHong Zhang } 2169566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 2179566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 218b2b77a04SHong Zhang PetscFunctionReturn(0); 219b2b77a04SHong Zhang } 220b2b77a04SHong Zhang 2210cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 22297504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel 22397504071SAmlan Barua Input parameter: 2243564f093SHong Zhang A - the matrix 22597504071SAmlan Barua x - the vector on which FDFT will be performed 22697504071SAmlan Barua 22797504071SAmlan Barua Output parameter: 22897504071SAmlan Barua y - vector that stores result of FDFT 22997504071SAmlan Barua */ 230*9371c9d4SSatish Balay PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y) { 231b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 232b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 233f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 234f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 235c92418ccSAmlan Barua PetscInt ndim = fft->ndim, *dim = fft->dim; 236ce94432eSBarry Smith MPI_Comm comm; 237b2b77a04SHong Zhang 238b2b77a04SHong Zhang PetscFunctionBegin; 2399566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2409566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 2419566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 2426aad120cSJose E. Roman if (!fftw->p_forward) { /* create a plan, then execute it */ 243b2b77a04SHong Zhang switch (ndim) { 244b2b77a04SHong Zhang case 1: 2453c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 246b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 247ae0a50aaSHong Zhang #else 2484f8276c3SHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 2493c3a9c75SAmlan Barua #endif 250b2b77a04SHong Zhang break; 251b2b77a04SHong Zhang case 2: 25297504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 253b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 2543c3a9c75SAmlan Barua #else 2553c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 2563c3a9c75SAmlan Barua #endif 257b2b77a04SHong Zhang break; 258b2b77a04SHong Zhang case 3: 2593c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 260b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 2613c3a9c75SAmlan Barua #else 26251d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 2633c3a9c75SAmlan Barua #endif 264b2b77a04SHong Zhang break; 265b2b77a04SHong Zhang default: 2663c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 267c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 2683c3a9c75SAmlan Barua #else 2693c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 2703c3a9c75SAmlan Barua #endif 271b2b77a04SHong Zhang break; 272b2b77a04SHong Zhang } 273f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *)x_array; 274b2b77a04SHong Zhang fftw->foutarray = y_array; 275b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 276b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 277b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 278b2b77a04SHong Zhang } else { /* use existing plan */ 279b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 280b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 281b2b77a04SHong Zhang } else { 282b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 283b2b77a04SHong Zhang } 284b2b77a04SHong Zhang } 2859566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 2869566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 287b2b77a04SHong Zhang PetscFunctionReturn(0); 288b2b77a04SHong Zhang } 289b2b77a04SHong Zhang 2900cf2e031SBarry Smith /* 2910cf2e031SBarry Smith MatMultTranspose_MPIFFTW performs parallel backward DFT 29297504071SAmlan Barua Input parameter: 2933564f093SHong Zhang A - the matrix 29497504071SAmlan Barua x - the vector on which BDFT will be performed 29597504071SAmlan Barua 29697504071SAmlan Barua Output parameter: 29797504071SAmlan Barua y - vector that stores result of BDFT 29897504071SAmlan Barua */ 299*9371c9d4SSatish Balay PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y) { 300b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 301b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 302f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 303f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 304c92418ccSAmlan Barua PetscInt ndim = fft->ndim, *dim = fft->dim; 305ce94432eSBarry Smith MPI_Comm comm; 306b2b77a04SHong Zhang 307b2b77a04SHong Zhang PetscFunctionBegin; 3089566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 3099566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 3109566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 3116aad120cSJose E. Roman if (!fftw->p_backward) { /* create a plan, then execute it */ 312b2b77a04SHong Zhang switch (ndim) { 313b2b77a04SHong Zhang case 1: 3143c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 315b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 316ae0a50aaSHong Zhang #else 3174f8276c3SHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 3183c3a9c75SAmlan Barua #endif 319b2b77a04SHong Zhang break; 320b2b77a04SHong Zhang case 2: 32197504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 322b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 3233c3a9c75SAmlan Barua #else 3243c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 3253c3a9c75SAmlan Barua #endif 326b2b77a04SHong Zhang break; 327b2b77a04SHong Zhang case 3: 3283c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 329b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 3303c3a9c75SAmlan Barua #else 3313c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 3323c3a9c75SAmlan Barua #endif 333b2b77a04SHong Zhang break; 334b2b77a04SHong Zhang default: 3353c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 336c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 3373c3a9c75SAmlan Barua #else 3383c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 3393c3a9c75SAmlan Barua #endif 340b2b77a04SHong Zhang break; 341b2b77a04SHong Zhang } 342f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *)x_array; 343b2b77a04SHong Zhang fftw->boutarray = y_array; 3442f613bf5SBarry Smith fftw_execute(fftw->p_backward); 345b2b77a04SHong Zhang } else { /* use existing plan */ 346b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 347b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 348b2b77a04SHong Zhang } else { 3492f613bf5SBarry Smith fftw_execute(fftw->p_backward); 350b2b77a04SHong Zhang } 351b2b77a04SHong Zhang } 3529566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 3539566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 354b2b77a04SHong Zhang PetscFunctionReturn(0); 355b2b77a04SHong Zhang } 3560cf2e031SBarry Smith #endif 357b2b77a04SHong Zhang 358*9371c9d4SSatish Balay PetscErrorCode MatDestroy_FFTW(Mat A) { 359b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 360b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 361b2b77a04SHong Zhang 362b2b77a04SHong Zhang PetscFunctionBegin; 363b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 364b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 365*9371c9d4SSatish Balay if (fftw->iodims) { free(fftw->iodims); } 3669566063dSJacob Faibussowitsch PetscCall(PetscFree(fftw->dim_fftw)); 3679566063dSJacob Faibussowitsch PetscCall(PetscFree(fft->data)); 3682e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL)); 3692e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL)); 3702e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL)); 3710cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 3726ccf0b3eSHong Zhang fftw_mpi_cleanup(); 3730cf2e031SBarry Smith #endif 374b2b77a04SHong Zhang PetscFunctionReturn(0); 375b2b77a04SHong Zhang } 376b2b77a04SHong Zhang 3770cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 378c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 379*9371c9d4SSatish Balay PetscErrorCode VecDestroy_MPIFFTW(Vec v) { 380b2b77a04SHong Zhang PetscScalar *array; 381b2b77a04SHong Zhang 382b2b77a04SHong Zhang PetscFunctionBegin; 3839566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &array)); 3842f613bf5SBarry Smith fftw_free((fftw_complex *)array); 3859566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &array)); 3869566063dSJacob Faibussowitsch PetscCall(VecDestroy_MPI(v)); 387b2b77a04SHong Zhang PetscFunctionReturn(0); 388b2b77a04SHong Zhang } 3890cf2e031SBarry Smith #endif 390b2b77a04SHong Zhang 3910cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 392*9371c9d4SSatish Balay static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new) { 3935b113e21Ss-sajid-ali Mat A; 3945b113e21Ss-sajid-ali 3955b113e21Ss-sajid-ali PetscFunctionBegin; 3969566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A)); 3979566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL)); 3985b113e21Ss-sajid-ali PetscFunctionReturn(0); 3995b113e21Ss-sajid-ali } 4005b113e21Ss-sajid-ali 401*9371c9d4SSatish Balay static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new) { 4025b113e21Ss-sajid-ali Mat A; 4035b113e21Ss-sajid-ali 4045b113e21Ss-sajid-ali PetscFunctionBegin; 4059566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A)); 4069566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL)); 4075b113e21Ss-sajid-ali PetscFunctionReturn(0); 4085b113e21Ss-sajid-ali } 4095b113e21Ss-sajid-ali 410*9371c9d4SSatish Balay static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) { 4115b113e21Ss-sajid-ali Mat A; 4125b113e21Ss-sajid-ali 4135b113e21Ss-sajid-ali PetscFunctionBegin; 4149566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A)); 4159566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new)); 4165b113e21Ss-sajid-ali PetscFunctionReturn(0); 4175b113e21Ss-sajid-ali } 4180cf2e031SBarry Smith #endif 4195b113e21Ss-sajid-ali 4204be45526SHong Zhang /*@ 4212a7a6963SBarry Smith MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 4224f7415efSAmlan Barua parallel layout determined by FFTW 4234f7415efSAmlan Barua 4244f7415efSAmlan Barua Collective on Mat 4254f7415efSAmlan Barua 4264f7415efSAmlan Barua Input Parameter: 4273564f093SHong Zhang . A - the matrix 4284f7415efSAmlan Barua 429d8d19677SJose E. Roman Output Parameters: 430cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW 4316b867d5aSJose E. Roman . y - (optional) output vector of forward FFTW 432cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW 4334f7415efSAmlan Barua 4344f7415efSAmlan Barua Level: advanced 4353564f093SHong Zhang 43697504071SAmlan Barua Note: The parallel layout of output of forward FFTW is always same as the input 43797504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 43897504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 43997504071SAmlan Barua Also note that we need to provide enough space while doing parallel real transform. 44097504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 44197504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 442e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 443e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two 444e0ec536eSAmlan Barua zeros if it is odd only one zero is needed. 445e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local 446e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for 447e0ec536eSAmlan Barua each processor and returns that. 4484f7415efSAmlan Barua 449db781477SPatrick Sanan .seealso: `MatCreateFFT()` 4504be45526SHong Zhang @*/ 451*9371c9d4SSatish Balay PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z) { 4524be45526SHong Zhang PetscFunctionBegin; 453cac4c232SBarry Smith PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z)); 4544be45526SHong Zhang PetscFunctionReturn(0); 4554be45526SHong Zhang } 4564be45526SHong Zhang 457*9371c9d4SSatish Balay PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout) { 4584f7415efSAmlan Barua PetscMPIInt size, rank; 459ce94432eSBarry Smith MPI_Comm comm; 4604f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT *)A->data; 4614f7415efSAmlan Barua 4624f7415efSAmlan Barua PetscFunctionBegin; 4634f7415efSAmlan Barua PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4644f7415efSAmlan Barua PetscValidType(A, 1); 4659566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 4664f7415efSAmlan Barua 4679566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 4689566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4694f7415efSAmlan Barua if (size == 1) { /* sequential case */ 4704f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4719566063dSJacob Faibussowitsch if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin)); 4729566063dSJacob Faibussowitsch if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout)); 4739566063dSJacob Faibussowitsch if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout)); 4744f7415efSAmlan Barua #else 4759566063dSJacob Faibussowitsch if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin)); 4769566063dSJacob Faibussowitsch if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout)); 4779566063dSJacob Faibussowitsch if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout)); 4784f7415efSAmlan Barua #endif 4790cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 48097504071SAmlan Barua } else { /* parallel cases */ 4810cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 4820cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 4834f7415efSAmlan Barua ptrdiff_t alloc_local, local_n0, local_0_start; 4849496c9aeSAmlan Barua ptrdiff_t local_n1; 4859496c9aeSAmlan Barua fftw_complex *data_fout; 4869496c9aeSAmlan Barua ptrdiff_t local_1_start; 4879496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4889496c9aeSAmlan Barua fftw_complex *data_fin, *data_bout; 4899496c9aeSAmlan Barua #else 4904f7415efSAmlan Barua double *data_finr, *data_boutr; 4916ccf0b3eSHong Zhang PetscInt n1, N1; 4929496c9aeSAmlan Barua ptrdiff_t temp; 4939496c9aeSAmlan Barua #endif 4949496c9aeSAmlan Barua 4954f7415efSAmlan Barua switch (ndim) { 4964f7415efSAmlan Barua case 1: 49757625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4984f8276c3SHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform"); 49957625b84SAmlan Barua #else 50057625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 50157625b84SAmlan Barua if (fin) { 50257625b84SAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5039566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin)); 5049566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 5055b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 50657625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 50757625b84SAmlan Barua } 50857625b84SAmlan Barua if (fout) { 50957625b84SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5109566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout)); 5119566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 5125b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 51357625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 51457625b84SAmlan Barua } 51557625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 51657625b84SAmlan Barua if (bout) { 51757625b84SAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5189566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout)); 5199566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 5205b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 52157625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 52257625b84SAmlan Barua } 52357625b84SAmlan Barua break; 52457625b84SAmlan Barua #endif 5254f7415efSAmlan Barua case 2: 52697504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 5274f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 528*9371c9d4SSatish Balay N1 = 2 * dim[0] * (dim[1] / 2 + 1); 529*9371c9d4SSatish Balay n1 = 2 * local_n0 * (dim[1] / 2 + 1); 5304f7415efSAmlan Barua if (fin) { 5314f7415efSAmlan Barua data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 5329566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 5339566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 5345b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5354f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5364f7415efSAmlan Barua } 53757625b84SAmlan Barua if (fout) { 53857625b84SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5399566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout)); 5409566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 5415b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 54257625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 54357625b84SAmlan Barua } 5445b113e21Ss-sajid-ali if (bout) { 5455b113e21Ss-sajid-ali data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 5469566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 5479566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 5485b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 5495b113e21Ss-sajid-ali (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5505b113e21Ss-sajid-ali } 5514f7415efSAmlan Barua #else 5524f7415efSAmlan Barua /* Get local size */ 5534f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 5544f7415efSAmlan Barua if (fin) { 5554f7415efSAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5569566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 5579566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 5585b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5594f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5604f7415efSAmlan Barua } 5614f7415efSAmlan Barua if (fout) { 5624f7415efSAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5639566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 5649566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 5655b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 5664f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5674f7415efSAmlan Barua } 5684f7415efSAmlan Barua if (bout) { 5694f7415efSAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5709566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 5719566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 5725b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 5734f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5744f7415efSAmlan Barua } 5754f7415efSAmlan Barua #endif 5764f7415efSAmlan Barua break; 5774f7415efSAmlan Barua case 3: 5784f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5794f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 580*9371c9d4SSatish Balay N1 = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1); 581*9371c9d4SSatish Balay n1 = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1); 5824f7415efSAmlan Barua if (fin) { 5834f7415efSAmlan Barua data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 5849566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 5859566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 5865b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5874f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5884f7415efSAmlan Barua } 5895b113e21Ss-sajid-ali if (fout) { 5905b113e21Ss-sajid-ali data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5919566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout)); 5929566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 5935b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 5945b113e21Ss-sajid-ali (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5955b113e21Ss-sajid-ali } 5964f7415efSAmlan Barua if (bout) { 5974f7415efSAmlan Barua data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 5989566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 5999566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6005b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6014f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6024f7415efSAmlan Barua } 6034f7415efSAmlan Barua #else 6040c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 6050c9b18e4SAmlan Barua if (fin) { 6060c9b18e4SAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6079566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 6089566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 6095b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6100c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6110c9b18e4SAmlan Barua } 6120c9b18e4SAmlan Barua if (fout) { 6130c9b18e4SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6149566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 6159566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 6165b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6170c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6180c9b18e4SAmlan Barua } 6190c9b18e4SAmlan Barua if (bout) { 6200c9b18e4SAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6219566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 6229566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6235b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6240c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6250c9b18e4SAmlan Barua } 6264f7415efSAmlan Barua #endif 6274f7415efSAmlan Barua break; 6284f7415efSAmlan Barua default: 6294f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6304f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 6314f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 6324f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 6330cf2e031SBarry Smith N1 = 2 * fft->N * (PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw - 1]) / ((PetscInt)temp); 6344f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 6354f7415efSAmlan Barua 6364f7415efSAmlan Barua if (fin) { 6374f7415efSAmlan Barua data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 6389566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin)); 6399566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 6405b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6414f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6424f7415efSAmlan Barua } 6435b113e21Ss-sajid-ali if (fout) { 6445b113e21Ss-sajid-ali data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6459566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout)); 6469566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 6475b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6485b113e21Ss-sajid-ali (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6495b113e21Ss-sajid-ali } 6504f7415efSAmlan Barua if (bout) { 6514f7415efSAmlan Barua data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 6529566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout)); 6539566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6545b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6559496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6564f7415efSAmlan Barua } 6574f7415efSAmlan Barua #else 6580c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 6590c9b18e4SAmlan Barua if (fin) { 6600c9b18e4SAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6619566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 6629566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 6635b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6640c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6650c9b18e4SAmlan Barua } 6660c9b18e4SAmlan Barua if (fout) { 6670c9b18e4SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6689566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 6699566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 6705b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6710c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6720c9b18e4SAmlan Barua } 6730c9b18e4SAmlan Barua if (bout) { 6740c9b18e4SAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6759566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 6769566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6775b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6780c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6790c9b18e4SAmlan Barua } 6804f7415efSAmlan Barua #endif 6814f7415efSAmlan Barua break; 6824f7415efSAmlan Barua } 683f9d91177SJunchao Zhang /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 684f9d91177SJunchao Zhang v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 685f9d91177SJunchao Zhang memory leaks. We void these pointers here to avoid acident uses. 686f9d91177SJunchao Zhang */ 687f9d91177SJunchao Zhang if (fin) (*fin)->ops->replacearray = NULL; 688f9d91177SJunchao Zhang if (fout) (*fout)->ops->replacearray = NULL; 689f9d91177SJunchao Zhang if (bout) (*bout)->ops->replacearray = NULL; 6900cf2e031SBarry Smith #endif 6914f7415efSAmlan Barua } 6924f7415efSAmlan Barua PetscFunctionReturn(0); 6934f7415efSAmlan Barua } 6944f7415efSAmlan Barua 6953564f093SHong Zhang /*@ 6963564f093SHong Zhang VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 69754efbe56SHong Zhang 6983564f093SHong Zhang Collective on Mat 6993564f093SHong Zhang 7003564f093SHong Zhang Input Parameters: 7013564f093SHong Zhang + A - FFTW matrix 7023564f093SHong Zhang - x - the PETSc vector 7033564f093SHong Zhang 7043564f093SHong Zhang Output Parameters: 7053564f093SHong Zhang . y - the FFTW vector 7063564f093SHong Zhang 707b2b77a04SHong Zhang Options Database Keys: 7083564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags 709b2b77a04SHong Zhang 710b2b77a04SHong Zhang Level: intermediate 7113564f093SHong Zhang 71297504071SAmlan Barua Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 71397504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 71497504071SAmlan Barua zeros. This routine does that job by scattering operation. 71597504071SAmlan Barua 716db781477SPatrick Sanan .seealso: `VecScatterFFTWToPetsc()` 7173564f093SHong Zhang @*/ 718*9371c9d4SSatish Balay PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y) { 7193564f093SHong Zhang PetscFunctionBegin; 720cac4c232SBarry Smith PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y)); 7213564f093SHong Zhang PetscFunctionReturn(0); 7223564f093SHong Zhang } 7233c3a9c75SAmlan Barua 724*9371c9d4SSatish Balay PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y) { 725ce94432eSBarry Smith MPI_Comm comm; 7263c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT *)A->data; 7279496c9aeSAmlan Barua PetscInt low; 7287a21806cSHong Zhang PetscMPIInt rank, size; 7297a21806cSHong Zhang PetscInt vsize, vsize1; 7303c3a9c75SAmlan Barua VecScatter vecscat; 7310cf2e031SBarry Smith IS list1; 7323c3a9c75SAmlan Barua 7333564f093SHong Zhang PetscFunctionBegin; 7349566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 7359566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 7369566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 7379566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(y, &low, NULL)); 7383c3a9c75SAmlan Barua 7393564f093SHong Zhang if (size == 1) { 7409566063dSJacob Faibussowitsch PetscCall(VecGetSize(x, &vsize)); 7419566063dSJacob Faibussowitsch PetscCall(VecGetSize(y, &vsize1)); 7429566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1)); 7439566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 7449566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7459566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7469566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 7479566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 7480cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 7493564f093SHong Zhang } else { 7500cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 7510cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 7520cf2e031SBarry Smith ptrdiff_t local_n0, local_0_start; 7530cf2e031SBarry Smith ptrdiff_t local_n1, local_1_start; 7540cf2e031SBarry Smith IS list2; 7550cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 7560cf2e031SBarry Smith PetscInt i, j, k, partial_dim; 7570cf2e031SBarry Smith PetscInt *indx1, *indx2, tempindx, tempindx1; 7580cf2e031SBarry Smith PetscInt NM; 7590cf2e031SBarry Smith ptrdiff_t temp; 7600cf2e031SBarry Smith #endif 7610cf2e031SBarry Smith 7623c3a9c75SAmlan Barua switch (ndim) { 7633c3a9c75SAmlan Barua case 1: 76464657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7657a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 76626fbe8dcSKarl Rupp 7679566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1)); 7689566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2)); 7699566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 7709566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7719566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7729566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 7739566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 7749566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 77564657d84SAmlan Barua #else 776e5d7f247SAmlan Barua SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 77764657d84SAmlan Barua #endif 7783c3a9c75SAmlan Barua break; 7793c3a9c75SAmlan Barua case 2: 780bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7817a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 78226fbe8dcSKarl Rupp 7839566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 7849566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 7859566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 7869566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7879566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7889566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 7899566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 7909566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 791bd59e6c6SAmlan Barua #else 792c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 79326fbe8dcSKarl Rupp 7949566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1)); 7959566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2)); 7963c3a9c75SAmlan Barua 7973564f093SHong Zhang if (dim[1] % 2 == 0) { 7983c3a9c75SAmlan Barua NM = dim[1] + 2; 7993564f093SHong Zhang } else { 8003c3a9c75SAmlan Barua NM = dim[1] + 1; 8013564f093SHong Zhang } 8023c3a9c75SAmlan Barua for (i = 0; i < local_n0; i++) { 8033c3a9c75SAmlan Barua for (j = 0; j < dim[1]; j++) { 8043c3a9c75SAmlan Barua tempindx = i * dim[1] + j; 8053c3a9c75SAmlan Barua tempindx1 = i * NM + j; 80626fbe8dcSKarl Rupp 8075b3e41ffSAmlan Barua indx1[tempindx] = local_0_start * dim[1] + tempindx; 8083c3a9c75SAmlan Barua indx2[tempindx] = low + tempindx1; 8093c3a9c75SAmlan Barua } 8103c3a9c75SAmlan Barua } 8113c3a9c75SAmlan Barua 8129566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 8139566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 8143c3a9c75SAmlan Barua 8159566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8169566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8179566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8189566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8199566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8209566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 8219566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 8229566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 823bd59e6c6SAmlan Barua #endif 8249496c9aeSAmlan Barua break; 8253c3a9c75SAmlan Barua 8263c3a9c75SAmlan Barua case 3: 827bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8287a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 82926fbe8dcSKarl Rupp 8309566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 8319566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 8329566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8339566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8349566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8359566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8369566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8379566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 838bd59e6c6SAmlan Barua #else 839c4762a1bSJed Brown /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 840f1ade23cSHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform"); 8417a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 84226fbe8dcSKarl Rupp 8439566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1)); 8449566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2)); 84551d1eed7SAmlan Barua 846db4deed7SKarl Rupp if (dim[2] % 2 == 0) NM = dim[2] + 2; 847db4deed7SKarl Rupp else NM = dim[2] + 1; 84851d1eed7SAmlan Barua 84951d1eed7SAmlan Barua for (i = 0; i < local_n0; i++) { 85051d1eed7SAmlan Barua for (j = 0; j < dim[1]; j++) { 85151d1eed7SAmlan Barua for (k = 0; k < dim[2]; k++) { 85251d1eed7SAmlan Barua tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 85351d1eed7SAmlan Barua tempindx1 = i * dim[1] * NM + j * NM + k; 85426fbe8dcSKarl Rupp 85551d1eed7SAmlan Barua indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 85651d1eed7SAmlan Barua indx2[tempindx] = low + tempindx1; 85751d1eed7SAmlan Barua } 85851d1eed7SAmlan Barua } 85951d1eed7SAmlan Barua } 86051d1eed7SAmlan Barua 8619566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 8629566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 8639566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8649566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8659566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8669566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8679566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8689566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 8699566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 8709566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 871bd59e6c6SAmlan Barua #endif 8729496c9aeSAmlan Barua break; 8733c3a9c75SAmlan Barua 8743c3a9c75SAmlan Barua default: 875bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8767a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 87726fbe8dcSKarl Rupp 8789566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 8799566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 8809566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8819566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8829566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8839566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8849566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8859566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 886bd59e6c6SAmlan Barua #else 887c4762a1bSJed Brown /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 888f1ade23cSHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform"); 889e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 89026fbe8dcSKarl Rupp 891e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 89226fbe8dcSKarl Rupp 8937a21806cSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 89426fbe8dcSKarl Rupp 895e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 896e5d7f247SAmlan Barua 897e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 898e5d7f247SAmlan Barua 8999566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1)); 9009566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2)); 901e5d7f247SAmlan Barua 902db4deed7SKarl Rupp if (dim[ndim - 1] % 2 == 0) NM = 2; 903db4deed7SKarl Rupp else NM = 1; 904e5d7f247SAmlan Barua 9056971673cSAmlan Barua j = low; 9063564f093SHong Zhang for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) { 9076971673cSAmlan Barua indx1[i] = local_0_start * partial_dim + i; 9086971673cSAmlan Barua indx2[i] = j; 90926fbe8dcSKarl Rupp if (k % dim[ndim - 1] == 0) j += NM; 9106971673cSAmlan Barua j++; 9116971673cSAmlan Barua } 9129566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 9139566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 9149566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 9159566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9169566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9179566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 9189566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 9199566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 9209566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 9219566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 922bd59e6c6SAmlan Barua #endif 9239496c9aeSAmlan Barua break; 9243c3a9c75SAmlan Barua } 9250cf2e031SBarry Smith #endif 926e81bb599SAmlan Barua } 9273564f093SHong Zhang PetscFunctionReturn(0); 9283c3a9c75SAmlan Barua } 9293c3a9c75SAmlan Barua 9303564f093SHong Zhang /*@ 9313564f093SHong Zhang VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 9323564f093SHong Zhang 9333564f093SHong Zhang Collective on Mat 9343564f093SHong Zhang 9353564f093SHong Zhang Input Parameters: 9363564f093SHong Zhang + A - FFTW matrix 9373564f093SHong Zhang - x - FFTW vector 9383564f093SHong Zhang 9393564f093SHong Zhang Output Parameters: 9403564f093SHong Zhang . y - PETSc vector 9413564f093SHong Zhang 9423564f093SHong Zhang Level: intermediate 9433564f093SHong Zhang 9443564f093SHong Zhang Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 9453564f093SHong Zhang VecScatterFFTWToPetsc removes those extra zeros. 9463564f093SHong Zhang 947db781477SPatrick Sanan .seealso: `VecScatterPetscToFFTW()` 9483564f093SHong Zhang @*/ 949*9371c9d4SSatish Balay PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y) { 9503c3a9c75SAmlan Barua PetscFunctionBegin; 951cac4c232SBarry Smith PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y)); 9523c3a9c75SAmlan Barua PetscFunctionReturn(0); 9533c3a9c75SAmlan Barua } 95454efbe56SHong Zhang 955*9371c9d4SSatish Balay PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y) { 956ce94432eSBarry Smith MPI_Comm comm; 9575b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT *)A->data; 9589496c9aeSAmlan Barua PetscInt low; 9597a21806cSHong Zhang PetscMPIInt rank, size; 9605b3e41ffSAmlan Barua VecScatter vecscat; 9610cf2e031SBarry Smith IS list1; 9629496c9aeSAmlan Barua 9633564f093SHong Zhang PetscFunctionBegin; 9649566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 9659566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 9669566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 9679566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(x, &low, NULL)); 9685b3e41ffSAmlan Barua 969e81bb599SAmlan Barua if (size == 1) { 9709566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1)); 9719566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 9729566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9739566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9749566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 9759566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 976e81bb599SAmlan Barua 9770cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 9783564f093SHong Zhang } else { 9790cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 9800cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 9810cf2e031SBarry Smith ptrdiff_t local_n0, local_0_start; 9820cf2e031SBarry Smith ptrdiff_t local_n1, local_1_start; 9830cf2e031SBarry Smith IS list2; 9840cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 9850cf2e031SBarry Smith PetscInt i, j, k, partial_dim; 9860cf2e031SBarry Smith PetscInt *indx1, *indx2, tempindx, tempindx1; 9870cf2e031SBarry Smith PetscInt NM; 9880cf2e031SBarry Smith ptrdiff_t temp; 9890cf2e031SBarry Smith #endif 990e81bb599SAmlan Barua switch (ndim) { 991e81bb599SAmlan Barua case 1: 99264657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 9937a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 99426fbe8dcSKarl Rupp 9959566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1)); 9969566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2)); 9979566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 9989566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9999566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10009566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10019566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10029566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 100364657d84SAmlan Barua #else 10046a506ed8SAmlan Barua SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT"); 100564657d84SAmlan Barua #endif 10065b3e41ffSAmlan Barua break; 10075b3e41ffSAmlan Barua case 2: 1008bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10097a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 101026fbe8dcSKarl Rupp 10119566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 10129566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 10139566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 10149566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10159566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10169566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10179566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10189566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 1019bd59e6c6SAmlan Barua #else 10207a21806cSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 102126fbe8dcSKarl Rupp 10229566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1)); 10239566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2)); 10245b3e41ffSAmlan Barua 1025db4deed7SKarl Rupp if (dim[1] % 2 == 0) NM = dim[1] + 2; 1026db4deed7SKarl Rupp else NM = dim[1] + 1; 10275b3e41ffSAmlan Barua 10285b3e41ffSAmlan Barua for (i = 0; i < local_n0; i++) { 10295b3e41ffSAmlan Barua for (j = 0; j < dim[1]; j++) { 10305b3e41ffSAmlan Barua tempindx = i * dim[1] + j; 10315b3e41ffSAmlan Barua tempindx1 = i * NM + j; 103226fbe8dcSKarl Rupp 10335b3e41ffSAmlan Barua indx1[tempindx] = local_0_start * dim[1] + tempindx; 10345b3e41ffSAmlan Barua indx2[tempindx] = low + tempindx1; 10355b3e41ffSAmlan Barua } 10365b3e41ffSAmlan Barua } 10375b3e41ffSAmlan Barua 10389566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 10399566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 10405b3e41ffSAmlan Barua 10419566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 10429566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10439566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10449566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10459566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10469566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 10479566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 10489566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 1049bd59e6c6SAmlan Barua #endif 10509496c9aeSAmlan Barua break; 10515b3e41ffSAmlan Barua case 3: 1052bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10537a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 105426fbe8dcSKarl Rupp 10559566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 10569566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 10579566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 10589566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10599566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10609566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10619566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10629566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 1063bd59e6c6SAmlan Barua #else 10647a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 106526fbe8dcSKarl Rupp 10669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1)); 10679566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2)); 106851d1eed7SAmlan Barua 1069db4deed7SKarl Rupp if (dim[2] % 2 == 0) NM = dim[2] + 2; 1070db4deed7SKarl Rupp else NM = dim[2] + 1; 107151d1eed7SAmlan Barua 107251d1eed7SAmlan Barua for (i = 0; i < local_n0; i++) { 107351d1eed7SAmlan Barua for (j = 0; j < dim[1]; j++) { 107451d1eed7SAmlan Barua for (k = 0; k < dim[2]; k++) { 107551d1eed7SAmlan Barua tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 107651d1eed7SAmlan Barua tempindx1 = i * dim[1] * NM + j * NM + k; 107726fbe8dcSKarl Rupp 107851d1eed7SAmlan Barua indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 107951d1eed7SAmlan Barua indx2[tempindx] = low + tempindx1; 108051d1eed7SAmlan Barua } 108151d1eed7SAmlan Barua } 108251d1eed7SAmlan Barua } 108351d1eed7SAmlan Barua 10849566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 10859566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 108651d1eed7SAmlan Barua 10879566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 10889566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10899566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10909566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10919566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10929566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 10939566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 10949566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 1095bd59e6c6SAmlan Barua #endif 10969496c9aeSAmlan Barua break; 10975b3e41ffSAmlan Barua default: 1098bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10997a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 110026fbe8dcSKarl Rupp 11019566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 11029566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 11039566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 11049566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11059566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11069566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 11079566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 11089566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 1109bd59e6c6SAmlan Barua #else 1110ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 111126fbe8dcSKarl Rupp 1112ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 111326fbe8dcSKarl Rupp 1114c3eba89aSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 111526fbe8dcSKarl Rupp 1116ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 1117ba6e06d0SAmlan Barua 1118ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1119ba6e06d0SAmlan Barua 11209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1)); 11219566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2)); 1122ba6e06d0SAmlan Barua 1123db4deed7SKarl Rupp if (dim[ndim - 1] % 2 == 0) NM = 2; 1124db4deed7SKarl Rupp else NM = 1; 1125ba6e06d0SAmlan Barua 1126ba6e06d0SAmlan Barua j = low; 11273564f093SHong Zhang for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) { 1128ba6e06d0SAmlan Barua indx1[i] = local_0_start * partial_dim + i; 1129ba6e06d0SAmlan Barua indx2[i] = j; 11303564f093SHong Zhang if (k % dim[ndim - 1] == 0) j += NM; 1131ba6e06d0SAmlan Barua j++; 1132ba6e06d0SAmlan Barua } 11339566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 11349566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 1135ba6e06d0SAmlan Barua 11369566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 11379566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11389566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11399566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 11409566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 11419566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 11429566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 11439566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 1144bd59e6c6SAmlan Barua #endif 11459496c9aeSAmlan Barua break; 11465b3e41ffSAmlan Barua } 11470cf2e031SBarry Smith #endif 1148e81bb599SAmlan Barua } 11493564f093SHong Zhang PetscFunctionReturn(0); 11505b3e41ffSAmlan Barua } 11515b3e41ffSAmlan Barua 11523c3a9c75SAmlan Barua /* 11533564f093SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 11543564f093SHong Zhang 11553c3a9c75SAmlan Barua Options Database Keys: 11563c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 11573c3a9c75SAmlan Barua 11583c3a9c75SAmlan Barua Level: intermediate 11593c3a9c75SAmlan Barua 11603c3a9c75SAmlan Barua */ 1161*9371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) { 1162ce94432eSBarry Smith MPI_Comm comm; 1163b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 1164b2b77a04SHong Zhang Mat_FFTW *fftw; 11650cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 11665274ed1bSHong Zhang const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"}; 11675274ed1bSHong Zhang unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE}; 1168b2b77a04SHong Zhang PetscBool flg; 11694f48bc67SAmlan Barua PetscInt p_flag, partial_dim = 1, ctr; 117011902ff2SHong Zhang PetscMPIInt size, rank; 11719496c9aeSAmlan Barua ptrdiff_t *pdim; 11729496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11730cf2e031SBarry Smith PetscInt tot_dim; 11749496c9aeSAmlan Barua #endif 11759496c9aeSAmlan Barua 1176b2b77a04SHong Zhang PetscFunctionBegin; 11779566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 11789566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 11799566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1180c92418ccSAmlan Barua 11810cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 11821878d478SAmlan Barua fftw_mpi_init(); 11830cf2e031SBarry Smith #endif 118411902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t)); 118511902ff2SHong Zhang pdim[0] = dim[0]; 11868ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11878ca4c034SAmlan Barua tot_dim = 2 * dim[0]; 11888ca4c034SAmlan Barua #endif 11893564f093SHong Zhang for (ctr = 1; ctr < ndim; ctr++) { 11906882372aSHong Zhang partial_dim *= dim[ctr]; 119111902ff2SHong Zhang pdim[ctr] = dim[ctr]; 11928ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1193db4deed7SKarl Rupp if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1); 1194db4deed7SKarl Rupp else tot_dim *= dim[ctr]; 11958ca4c034SAmlan Barua #endif 11966882372aSHong Zhang } 11973c3a9c75SAmlan Barua 1198b2b77a04SHong Zhang if (size == 1) { 1199e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12009566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N)); 12010cf2e031SBarry Smith fft->n = fft->N; 1202e81bb599SAmlan Barua #else 12039566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim)); 12040cf2e031SBarry Smith fft->n = tot_dim; 12050cf2e031SBarry Smith #endif 12060cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 12070cf2e031SBarry Smith } else { 12080cf2e031SBarry Smith ptrdiff_t local_n0, local_0_start, local_n1, local_1_start; 12090cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 12100cf2e031SBarry Smith ptrdiff_t temp; 12110cf2e031SBarry Smith PetscInt N1; 1212e81bb599SAmlan Barua #endif 1213e81bb599SAmlan Barua 1214b2b77a04SHong Zhang switch (ndim) { 1215b2b77a04SHong Zhang case 1: 12163c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12173c3a9c75SAmlan Barua SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 1218e5d7f247SAmlan Barua #else 12197a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 12200cf2e031SBarry Smith fft->n = (PetscInt)local_n0; 12219566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N)); 1222e5d7f247SAmlan Barua #endif 1223b2b77a04SHong Zhang break; 1224b2b77a04SHong Zhang case 2: 12255b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 12267a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 12270cf2e031SBarry Smith fft->n = (PetscInt)local_n0 * dim[1]; 12289566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 12295b3e41ffSAmlan Barua #else 1230c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 123126fbe8dcSKarl Rupp 12320cf2e031SBarry Smith fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1); 12339566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1))); 12345b3e41ffSAmlan Barua #endif 1235b2b77a04SHong Zhang break; 1236b2b77a04SHong Zhang case 3: 123751d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12387a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 123926fbe8dcSKarl Rupp 12400cf2e031SBarry Smith fft->n = (PetscInt)local_n0 * dim[1] * dim[2]; 12419566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 124251d1eed7SAmlan Barua #else 1243c3eba89aSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 124426fbe8dcSKarl Rupp 12450cf2e031SBarry Smith fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1); 12469566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1))); 124751d1eed7SAmlan Barua #endif 1248b2b77a04SHong Zhang break; 1249b2b77a04SHong Zhang default: 1250b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12517a21806cSHong Zhang fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start); 125226fbe8dcSKarl Rupp 12530cf2e031SBarry Smith fft->n = (PetscInt)local_n0 * partial_dim; 12549566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1255b3a17365SAmlan Barua #else 1256b3a17365SAmlan Barua temp = pdim[ndim - 1]; 125726fbe8dcSKarl Rupp 1258b3a17365SAmlan Barua pdim[ndim - 1] = temp / 2 + 1; 125926fbe8dcSKarl Rupp 1260c3eba89aSHong Zhang fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 126126fbe8dcSKarl Rupp 12620cf2e031SBarry Smith fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp; 12630cf2e031SBarry Smith N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp); 126426fbe8dcSKarl Rupp 1265b3a17365SAmlan Barua pdim[ndim - 1] = temp; 126626fbe8dcSKarl Rupp 12679566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1)); 1268b3a17365SAmlan Barua #endif 1269b2b77a04SHong Zhang break; 1270b2b77a04SHong Zhang } 12710cf2e031SBarry Smith #endif 1272b2b77a04SHong Zhang } 12732277172eSMark Adams free(pdim); 12749566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW)); 12759566063dSJacob Faibussowitsch PetscCall(PetscNewLog(A, &fftw)); 1276b2b77a04SHong Zhang fft->data = (void *)fftw; 1277b2b77a04SHong Zhang 12780c74a584SJed Brown fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1279e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 128026fbe8dcSKarl Rupp 12819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw))); 12828c1d5ab3SHong Zhang if (size == 1) { 1283a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 1284a1b6d50cSHong Zhang fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim); 1285a1b6d50cSHong Zhang #else 12868c1d5ab3SHong Zhang fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim); 1287a1b6d50cSHong Zhang #endif 12888c1d5ab3SHong Zhang } 128926fbe8dcSKarl Rupp 1290b1301b2eSHong Zhang for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr]; 1291c92418ccSAmlan Barua 1292f4259b30SLisandro Dalcin fftw->p_forward = NULL; 1293f4259b30SLisandro Dalcin fftw->p_backward = NULL; 1294b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1295b2b77a04SHong Zhang 1296b2b77a04SHong Zhang if (size == 1) { 1297b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1298b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 12990cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 1300b2b77a04SHong Zhang } else { 1301b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1302b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 13030cf2e031SBarry Smith #endif 1304b2b77a04SHong Zhang } 1305b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1306b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 13074a52bad8SHong Zhang A->preallocated = PETSC_TRUE; 130826fbe8dcSKarl Rupp 13099566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW)); 13109566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW)); 13119566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW)); 1312b2b77a04SHong Zhang 1313b2b77a04SHong Zhang /* get runtime options */ 1314d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat"); 13159566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg)); 13165f80ce2aSJacob Faibussowitsch if (flg) fftw->p_flag = iplans[p_flag]; 1317d0609cedSBarry Smith PetscOptionsEnd(); 1318b2b77a04SHong Zhang PetscFunctionReturn(0); 1319b2b77a04SHong Zhang } 1320