1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4c4762a1bSJed Brown Testing examples can be found in ~src/mat/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 90cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 10c6db04a5SJed Brown #include <fftw3-mpi.h> 110cf2e031SBarry Smith #else 120cf2e031SBarry Smith #include <fftw3.h> 130cf2e031SBarry Smith #endif 14b2b77a04SHong Zhang EXTERN_C_END 15b2b77a04SHong Zhang 16b2b77a04SHong Zhang typedef struct { 17b9ae062cSHong Zhang ptrdiff_t ndim_fftw, *dim_fftw; 18a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 19a1b6d50cSHong Zhang fftw_iodim64 *iodims; 20a1b6d50cSHong Zhang #else 218c1d5ab3SHong Zhang fftw_iodim *iodims; 22a1b6d50cSHong Zhang #endif 23e5d7f247SAmlan Barua PetscInt partial_dim; 24b2b77a04SHong Zhang fftw_plan p_forward, p_backward; 25b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 26b2b77a04SHong Zhang PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays becaue fftw plan should be 27b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 28b2b77a04SHong Zhang } Mat_FFTW; 29b2b77a04SHong Zhang 30b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec); 31b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec); 320cf2e031SBarry Smith extern PetscErrorCode MatDestroy_FFTW(Mat); 330cf2e031SBarry Smith extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *); 340cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 35b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec); 36b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec); 37b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 380cf2e031SBarry Smith #endif 39b2b77a04SHong Zhang 400cf2e031SBarry Smith /* 410cf2e031SBarry Smith MatMult_SeqFFTW performs forward DFT 4297504071SAmlan Barua Input parameter: 433564f093SHong Zhang A - the matrix 4497504071SAmlan Barua x - the vector on which FDFT will be performed 4597504071SAmlan Barua 4697504071SAmlan Barua Output parameter: 4797504071SAmlan Barua y - vector that stores result of FDFT 4897504071SAmlan Barua */ 499371c9d4SSatish Balay PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y) { 50b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 51b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 52f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 53f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 541acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 55a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 56a1b6d50cSHong Zhang fftw_iodim64 *iodims; 57a1b6d50cSHong Zhang #else 588c1d5ab3SHong Zhang fftw_iodim *iodims; 59a1b6d50cSHong Zhang #endif 601acd80e4SHong Zhang PetscInt i; 611acd80e4SHong Zhang #endif 621acd80e4SHong Zhang PetscInt ndim = fft->ndim, *dim = fft->dim; 63b2b77a04SHong Zhang 64b2b77a04SHong Zhang PetscFunctionBegin; 659566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 669566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 676aad120cSJose E. Roman if (!fftw->p_forward) { /* create a plan, then execute it */ 68b2b77a04SHong Zhang switch (ndim) { 69b2b77a04SHong Zhang case 1: 7058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 71b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 7258a912c5SAmlan Barua #else 7358a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 7458a912c5SAmlan Barua #endif 75b2b77a04SHong Zhang break; 76b2b77a04SHong Zhang case 2: 7758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 78b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 7958a912c5SAmlan Barua #else 8058a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 8158a912c5SAmlan Barua #endif 82b2b77a04SHong Zhang break; 83b2b77a04SHong Zhang case 3: 8458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 85b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 8658a912c5SAmlan Barua #else 8758a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 8858a912c5SAmlan Barua #endif 89b2b77a04SHong Zhang break; 90b2b77a04SHong Zhang default: 9158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 92a1b6d50cSHong Zhang iodims = fftw->iodims; 93a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 948c1d5ab3SHong Zhang if (ndim) { 95a1b6d50cSHong Zhang iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1]; 968c1d5ab3SHong Zhang iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 978c1d5ab3SHong Zhang for (i = ndim - 2; i >= 0; --i) { 98a1b6d50cSHong Zhang iodims[i].n = (ptrdiff_t)dim[i]; 998c1d5ab3SHong Zhang iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 1008c1d5ab3SHong Zhang } 1018c1d5ab3SHong Zhang } 102a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 103a1b6d50cSHong Zhang #else 104a1b6d50cSHong Zhang if (ndim) { 105a1b6d50cSHong Zhang iodims[ndim - 1].n = (int)dim[ndim - 1]; 106a1b6d50cSHong Zhang iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 107a1b6d50cSHong Zhang for (i = ndim - 2; i >= 0; --i) { 108a1b6d50cSHong Zhang iodims[i].n = (int)dim[i]; 109a1b6d50cSHong Zhang iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 110a1b6d50cSHong Zhang } 111a1b6d50cSHong Zhang } 112a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 113a1b6d50cSHong Zhang #endif 114a1b6d50cSHong Zhang 11558a912c5SAmlan Barua #else 116a31b9140SHong Zhang fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 11758a912c5SAmlan Barua #endif 118b2b77a04SHong Zhang break; 119b2b77a04SHong Zhang } 120f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *)x_array; 121b2b77a04SHong Zhang fftw->foutarray = y_array; 122b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 123b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 124b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 125b2b77a04SHong Zhang } else { /* use existing plan */ 126b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 1277e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 128b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 1297e4bc134SDominic Meiser #else 1307e4bc134SDominic Meiser fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array); 1317e4bc134SDominic Meiser #endif 132b2b77a04SHong Zhang } else { 133b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 134b2b77a04SHong Zhang } 135b2b77a04SHong Zhang } 1369566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 1379566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 138b2b77a04SHong Zhang PetscFunctionReturn(0); 139b2b77a04SHong Zhang } 140b2b77a04SHong Zhang 14197504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT 14297504071SAmlan Barua Input parameter: 1433564f093SHong Zhang A - the matrix 14497504071SAmlan Barua x - the vector on which BDFT will be performed 14597504071SAmlan Barua 14697504071SAmlan Barua Output parameter: 14797504071SAmlan Barua y - vector that stores result of BDFT 14897504071SAmlan Barua */ 14997504071SAmlan Barua 1509371c9d4SSatish Balay PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y) { 151b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 152b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 153f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 154f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 155b2b77a04SHong Zhang PetscInt ndim = fft->ndim, *dim = fft->dim; 1561acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 157a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 158a1b6d50cSHong Zhang fftw_iodim64 *iodims = fftw->iodims; 159a1b6d50cSHong Zhang #else 160a1b6d50cSHong Zhang fftw_iodim *iodims = fftw->iodims; 161a1b6d50cSHong Zhang #endif 1621acd80e4SHong Zhang #endif 163b2b77a04SHong Zhang 164b2b77a04SHong Zhang PetscFunctionBegin; 1659566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 1669566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 1676aad120cSJose E. Roman if (!fftw->p_backward) { /* create a plan, then execute it */ 168b2b77a04SHong Zhang switch (ndim) { 169b2b77a04SHong Zhang case 1: 17058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 171b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 17258a912c5SAmlan Barua #else 173e81bb599SAmlan Barua fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 17458a912c5SAmlan Barua #endif 175b2b77a04SHong Zhang break; 176b2b77a04SHong Zhang case 2: 17758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 178b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 17958a912c5SAmlan Barua #else 180e81bb599SAmlan Barua fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 18158a912c5SAmlan Barua #endif 182b2b77a04SHong Zhang break; 183b2b77a04SHong Zhang case 3: 18458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 185b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 18658a912c5SAmlan Barua #else 187e81bb599SAmlan Barua fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 18858a912c5SAmlan Barua #endif 189b2b77a04SHong Zhang break; 190b2b77a04SHong Zhang default: 19158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 192a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 193a1b6d50cSHong Zhang fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 194a1b6d50cSHong Zhang #else 1958c1d5ab3SHong Zhang fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 196a1b6d50cSHong Zhang #endif 19758a912c5SAmlan Barua #else 198a31b9140SHong Zhang fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 19958a912c5SAmlan Barua #endif 200b2b77a04SHong Zhang break; 201b2b77a04SHong Zhang } 202f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *)x_array; 203b2b77a04SHong Zhang fftw->boutarray = y_array; 2042f613bf5SBarry Smith fftw_execute(fftw->p_backward); 205b2b77a04SHong Zhang } else { /* use existing plan */ 206b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 2077e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 208b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 2097e4bc134SDominic Meiser #else 2107e4bc134SDominic Meiser fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array); 2117e4bc134SDominic Meiser #endif 212b2b77a04SHong Zhang } else { 2132f613bf5SBarry Smith fftw_execute(fftw->p_backward); 214b2b77a04SHong Zhang } 215b2b77a04SHong Zhang } 2169566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 2179566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 218b2b77a04SHong Zhang PetscFunctionReturn(0); 219b2b77a04SHong Zhang } 220b2b77a04SHong Zhang 2210cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 22297504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel 22397504071SAmlan Barua Input parameter: 2243564f093SHong Zhang A - the matrix 22597504071SAmlan Barua x - the vector on which FDFT will be performed 22697504071SAmlan Barua 22797504071SAmlan Barua Output parameter: 22897504071SAmlan Barua y - vector that stores result of FDFT 22997504071SAmlan Barua */ 2309371c9d4SSatish Balay PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y) { 231b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 232b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 233f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 234f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 235c92418ccSAmlan Barua PetscInt ndim = fft->ndim, *dim = fft->dim; 236ce94432eSBarry Smith MPI_Comm comm; 237b2b77a04SHong Zhang 238b2b77a04SHong Zhang PetscFunctionBegin; 2399566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2409566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 2419566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 2426aad120cSJose E. Roman if (!fftw->p_forward) { /* create a plan, then execute it */ 243b2b77a04SHong Zhang switch (ndim) { 244b2b77a04SHong Zhang case 1: 2453c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 246b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 247ae0a50aaSHong Zhang #else 2484f8276c3SHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 2493c3a9c75SAmlan Barua #endif 250b2b77a04SHong Zhang break; 251b2b77a04SHong Zhang case 2: 25297504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 253b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 2543c3a9c75SAmlan Barua #else 2553c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 2563c3a9c75SAmlan Barua #endif 257b2b77a04SHong Zhang break; 258b2b77a04SHong Zhang case 3: 2593c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 260b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 2613c3a9c75SAmlan Barua #else 26251d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 2633c3a9c75SAmlan Barua #endif 264b2b77a04SHong Zhang break; 265b2b77a04SHong Zhang default: 2663c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 267c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 2683c3a9c75SAmlan Barua #else 2693c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 2703c3a9c75SAmlan Barua #endif 271b2b77a04SHong Zhang break; 272b2b77a04SHong Zhang } 273f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *)x_array; 274b2b77a04SHong Zhang fftw->foutarray = y_array; 275b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 276b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 277b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 278b2b77a04SHong Zhang } else { /* use existing plan */ 279b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 280b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 281b2b77a04SHong Zhang } else { 282b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 283b2b77a04SHong Zhang } 284b2b77a04SHong Zhang } 2859566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 2869566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 287b2b77a04SHong Zhang PetscFunctionReturn(0); 288b2b77a04SHong Zhang } 289b2b77a04SHong Zhang 2900cf2e031SBarry Smith /* 2910cf2e031SBarry Smith MatMultTranspose_MPIFFTW performs parallel backward DFT 29297504071SAmlan Barua Input parameter: 2933564f093SHong Zhang A - the matrix 29497504071SAmlan Barua x - the vector on which BDFT will be performed 29597504071SAmlan Barua 29697504071SAmlan Barua Output parameter: 29797504071SAmlan Barua y - vector that stores result of BDFT 29897504071SAmlan Barua */ 2999371c9d4SSatish Balay PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y) { 300b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 301b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 302f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 303f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 304c92418ccSAmlan Barua PetscInt ndim = fft->ndim, *dim = fft->dim; 305ce94432eSBarry Smith MPI_Comm comm; 306b2b77a04SHong Zhang 307b2b77a04SHong Zhang PetscFunctionBegin; 3089566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 3099566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 3109566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 3116aad120cSJose E. Roman if (!fftw->p_backward) { /* create a plan, then execute it */ 312b2b77a04SHong Zhang switch (ndim) { 313b2b77a04SHong Zhang case 1: 3143c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 315b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 316ae0a50aaSHong Zhang #else 3174f8276c3SHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 3183c3a9c75SAmlan Barua #endif 319b2b77a04SHong Zhang break; 320b2b77a04SHong Zhang case 2: 32197504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 322b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 3233c3a9c75SAmlan Barua #else 3243c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 3253c3a9c75SAmlan Barua #endif 326b2b77a04SHong Zhang break; 327b2b77a04SHong Zhang case 3: 3283c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 329b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 3303c3a9c75SAmlan Barua #else 3313c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 3323c3a9c75SAmlan Barua #endif 333b2b77a04SHong Zhang break; 334b2b77a04SHong Zhang default: 3353c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 336c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 3373c3a9c75SAmlan Barua #else 3383c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 3393c3a9c75SAmlan Barua #endif 340b2b77a04SHong Zhang break; 341b2b77a04SHong Zhang } 342f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *)x_array; 343b2b77a04SHong Zhang fftw->boutarray = y_array; 3442f613bf5SBarry Smith fftw_execute(fftw->p_backward); 345b2b77a04SHong Zhang } else { /* use existing plan */ 346b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 347b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 348b2b77a04SHong Zhang } else { 3492f613bf5SBarry Smith fftw_execute(fftw->p_backward); 350b2b77a04SHong Zhang } 351b2b77a04SHong Zhang } 3529566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 3539566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 354b2b77a04SHong Zhang PetscFunctionReturn(0); 355b2b77a04SHong Zhang } 3560cf2e031SBarry Smith #endif 357b2b77a04SHong Zhang 3589371c9d4SSatish Balay PetscErrorCode MatDestroy_FFTW(Mat A) { 359b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 360b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 361b2b77a04SHong Zhang 362b2b77a04SHong Zhang PetscFunctionBegin; 363b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 364b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 365ad540459SPierre Jolivet if (fftw->iodims) free(fftw->iodims); 3669566063dSJacob Faibussowitsch PetscCall(PetscFree(fftw->dim_fftw)); 3679566063dSJacob Faibussowitsch PetscCall(PetscFree(fft->data)); 3682e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL)); 3692e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL)); 3702e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL)); 3710cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 3726ccf0b3eSHong Zhang fftw_mpi_cleanup(); 3730cf2e031SBarry Smith #endif 374b2b77a04SHong Zhang PetscFunctionReturn(0); 375b2b77a04SHong Zhang } 376b2b77a04SHong Zhang 3770cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 378c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 3799371c9d4SSatish Balay PetscErrorCode VecDestroy_MPIFFTW(Vec v) { 380b2b77a04SHong Zhang PetscScalar *array; 381b2b77a04SHong Zhang 382b2b77a04SHong Zhang PetscFunctionBegin; 3839566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &array)); 3842f613bf5SBarry Smith fftw_free((fftw_complex *)array); 3859566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &array)); 3869566063dSJacob Faibussowitsch PetscCall(VecDestroy_MPI(v)); 387b2b77a04SHong Zhang PetscFunctionReturn(0); 388b2b77a04SHong Zhang } 3890cf2e031SBarry Smith #endif 390b2b77a04SHong Zhang 3910cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 3929371c9d4SSatish Balay static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new) { 3935b113e21Ss-sajid-ali Mat A; 3945b113e21Ss-sajid-ali 3955b113e21Ss-sajid-ali PetscFunctionBegin; 3969566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A)); 3979566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL)); 3985b113e21Ss-sajid-ali PetscFunctionReturn(0); 3995b113e21Ss-sajid-ali } 4005b113e21Ss-sajid-ali 4019371c9d4SSatish Balay static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new) { 4025b113e21Ss-sajid-ali Mat A; 4035b113e21Ss-sajid-ali 4045b113e21Ss-sajid-ali PetscFunctionBegin; 4059566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A)); 4069566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL)); 4075b113e21Ss-sajid-ali PetscFunctionReturn(0); 4085b113e21Ss-sajid-ali } 4095b113e21Ss-sajid-ali 4109371c9d4SSatish Balay static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) { 4115b113e21Ss-sajid-ali Mat A; 4125b113e21Ss-sajid-ali 4135b113e21Ss-sajid-ali PetscFunctionBegin; 4149566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A)); 4159566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new)); 4165b113e21Ss-sajid-ali PetscFunctionReturn(0); 4175b113e21Ss-sajid-ali } 4180cf2e031SBarry Smith #endif 4195b113e21Ss-sajid-ali 4204be45526SHong Zhang /*@ 4212a7a6963SBarry Smith MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 42211a5261eSBarry Smith parallel layout determined by `MATFFTW` 4234f7415efSAmlan Barua 4244f7415efSAmlan Barua Collective on Mat 4254f7415efSAmlan Barua 4264f7415efSAmlan Barua Input Parameter: 4273564f093SHong Zhang . A - the matrix 4284f7415efSAmlan Barua 429d8d19677SJose E. Roman Output Parameters: 430cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW 4316b867d5aSJose E. Roman . y - (optional) output vector of forward FFTW 432cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW 4334f7415efSAmlan Barua 4344f7415efSAmlan Barua Level: advanced 4353564f093SHong Zhang 43611a5261eSBarry Smith Notes: 43711a5261eSBarry Smith The parallel layout of output of forward FFTW is always same as the input 43897504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 43997504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 44011a5261eSBarry Smith 44111a5261eSBarry Smith We need to provide enough space while doing parallel real transform. 44297504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 44397504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 444e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 445e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two 446e0ec536eSAmlan Barua zeros if it is odd only one zero is needed. 44711a5261eSBarry Smith 448e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local 449e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for 450e0ec536eSAmlan Barua each processor and returns that. 4514f7415efSAmlan Barua 45211a5261eSBarry Smith Developer Note: 45311a5261eSBarry Smith How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically? 45411a5261eSBarry Smith 45511a5261eSBarry Smith .seealso: `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()` 4564be45526SHong Zhang @*/ 4579371c9d4SSatish Balay PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z) { 4584be45526SHong Zhang PetscFunctionBegin; 459cac4c232SBarry Smith PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z)); 4604be45526SHong Zhang PetscFunctionReturn(0); 4614be45526SHong Zhang } 4624be45526SHong Zhang 4639371c9d4SSatish Balay PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout) { 4644f7415efSAmlan Barua PetscMPIInt size, rank; 465ce94432eSBarry Smith MPI_Comm comm; 4664f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT *)A->data; 4674f7415efSAmlan Barua 4684f7415efSAmlan Barua PetscFunctionBegin; 4694f7415efSAmlan Barua PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4704f7415efSAmlan Barua PetscValidType(A, 1); 4719566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 4724f7415efSAmlan Barua 4739566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 4749566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4754f7415efSAmlan Barua if (size == 1) { /* sequential case */ 4764f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4779566063dSJacob Faibussowitsch if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin)); 4789566063dSJacob Faibussowitsch if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout)); 4799566063dSJacob Faibussowitsch if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout)); 4804f7415efSAmlan Barua #else 4819566063dSJacob Faibussowitsch if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin)); 4829566063dSJacob Faibussowitsch if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout)); 4839566063dSJacob Faibussowitsch if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout)); 4844f7415efSAmlan Barua #endif 4850cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 48697504071SAmlan Barua } else { /* parallel cases */ 4870cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 4880cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 4894f7415efSAmlan Barua ptrdiff_t alloc_local, local_n0, local_0_start; 4909496c9aeSAmlan Barua ptrdiff_t local_n1; 4919496c9aeSAmlan Barua fftw_complex *data_fout; 4929496c9aeSAmlan Barua ptrdiff_t local_1_start; 4939496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4949496c9aeSAmlan Barua fftw_complex *data_fin, *data_bout; 4959496c9aeSAmlan Barua #else 4964f7415efSAmlan Barua double *data_finr, *data_boutr; 4976ccf0b3eSHong Zhang PetscInt n1, N1; 4989496c9aeSAmlan Barua ptrdiff_t temp; 4999496c9aeSAmlan Barua #endif 5009496c9aeSAmlan Barua 5014f7415efSAmlan Barua switch (ndim) { 5024f7415efSAmlan Barua case 1: 50357625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5044f8276c3SHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform"); 50557625b84SAmlan Barua #else 50657625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 50757625b84SAmlan Barua if (fin) { 50857625b84SAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5099566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin)); 5109566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 5115b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 51257625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 51357625b84SAmlan Barua } 51457625b84SAmlan Barua if (fout) { 51557625b84SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5169566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout)); 5179566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 5185b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 51957625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 52057625b84SAmlan Barua } 52157625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 52257625b84SAmlan Barua if (bout) { 52357625b84SAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5249566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout)); 5259566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 5265b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 52757625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 52857625b84SAmlan Barua } 52957625b84SAmlan Barua break; 53057625b84SAmlan Barua #endif 5314f7415efSAmlan Barua case 2: 53297504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 5334f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 5349371c9d4SSatish Balay N1 = 2 * dim[0] * (dim[1] / 2 + 1); 5359371c9d4SSatish Balay n1 = 2 * local_n0 * (dim[1] / 2 + 1); 5364f7415efSAmlan Barua if (fin) { 5374f7415efSAmlan Barua data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 5389566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 5399566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 5405b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5414f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5424f7415efSAmlan Barua } 54357625b84SAmlan Barua if (fout) { 54457625b84SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5459566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout)); 5469566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 5475b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 54857625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 54957625b84SAmlan Barua } 5505b113e21Ss-sajid-ali if (bout) { 5515b113e21Ss-sajid-ali data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 5529566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 5539566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 5545b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 5555b113e21Ss-sajid-ali (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5565b113e21Ss-sajid-ali } 5574f7415efSAmlan Barua #else 5584f7415efSAmlan Barua /* Get local size */ 5594f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 5604f7415efSAmlan Barua if (fin) { 5614f7415efSAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5629566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 5639566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 5645b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5654f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5664f7415efSAmlan Barua } 5674f7415efSAmlan Barua if (fout) { 5684f7415efSAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5699566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 5709566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 5715b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 5724f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5734f7415efSAmlan Barua } 5744f7415efSAmlan Barua if (bout) { 5754f7415efSAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5769566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 5779566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 5785b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 5794f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5804f7415efSAmlan Barua } 5814f7415efSAmlan Barua #endif 5824f7415efSAmlan Barua break; 5834f7415efSAmlan Barua case 3: 5844f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5854f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 5869371c9d4SSatish Balay N1 = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1); 5879371c9d4SSatish Balay n1 = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1); 5884f7415efSAmlan Barua if (fin) { 5894f7415efSAmlan Barua data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 5909566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 5919566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 5925b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5934f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5944f7415efSAmlan Barua } 5955b113e21Ss-sajid-ali if (fout) { 5965b113e21Ss-sajid-ali data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5979566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout)); 5989566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 5995b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6005b113e21Ss-sajid-ali (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6015b113e21Ss-sajid-ali } 6024f7415efSAmlan Barua if (bout) { 6034f7415efSAmlan Barua data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 6049566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 6059566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6065b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6074f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6084f7415efSAmlan Barua } 6094f7415efSAmlan Barua #else 6100c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 6110c9b18e4SAmlan Barua if (fin) { 6120c9b18e4SAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6139566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 6149566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 6155b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6160c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6170c9b18e4SAmlan Barua } 6180c9b18e4SAmlan Barua if (fout) { 6190c9b18e4SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6209566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 6219566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 6225b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6230c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6240c9b18e4SAmlan Barua } 6250c9b18e4SAmlan Barua if (bout) { 6260c9b18e4SAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6279566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 6289566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6295b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6300c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6310c9b18e4SAmlan Barua } 6324f7415efSAmlan Barua #endif 6334f7415efSAmlan Barua break; 6344f7415efSAmlan Barua default: 6354f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6364f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 6374f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 6384f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 6390cf2e031SBarry Smith N1 = 2 * fft->N * (PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw - 1]) / ((PetscInt)temp); 6404f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 6414f7415efSAmlan Barua 6424f7415efSAmlan Barua if (fin) { 6434f7415efSAmlan Barua data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 6449566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin)); 6459566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 6465b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6474f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6484f7415efSAmlan Barua } 6495b113e21Ss-sajid-ali if (fout) { 6505b113e21Ss-sajid-ali data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6519566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout)); 6529566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 6535b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6545b113e21Ss-sajid-ali (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6555b113e21Ss-sajid-ali } 6564f7415efSAmlan Barua if (bout) { 6574f7415efSAmlan Barua data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 6589566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout)); 6599566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6605b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6619496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6624f7415efSAmlan Barua } 6634f7415efSAmlan Barua #else 6640c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 6650c9b18e4SAmlan Barua if (fin) { 6660c9b18e4SAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6679566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 6689566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 6695b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6700c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6710c9b18e4SAmlan Barua } 6720c9b18e4SAmlan Barua if (fout) { 6730c9b18e4SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6749566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 6759566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 6765b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6770c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6780c9b18e4SAmlan Barua } 6790c9b18e4SAmlan Barua if (bout) { 6800c9b18e4SAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6819566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 6829566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6835b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6840c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6850c9b18e4SAmlan Barua } 6864f7415efSAmlan Barua #endif 6874f7415efSAmlan Barua break; 6884f7415efSAmlan Barua } 689f9d91177SJunchao Zhang /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 690f9d91177SJunchao Zhang v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 691f9d91177SJunchao Zhang memory leaks. We void these pointers here to avoid acident uses. 692f9d91177SJunchao Zhang */ 693f9d91177SJunchao Zhang if (fin) (*fin)->ops->replacearray = NULL; 694f9d91177SJunchao Zhang if (fout) (*fout)->ops->replacearray = NULL; 695f9d91177SJunchao Zhang if (bout) (*bout)->ops->replacearray = NULL; 6960cf2e031SBarry Smith #endif 6974f7415efSAmlan Barua } 6984f7415efSAmlan Barua PetscFunctionReturn(0); 6994f7415efSAmlan Barua } 7004f7415efSAmlan Barua 7013564f093SHong Zhang /*@ 70211a5261eSBarry Smith VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls. 70354efbe56SHong Zhang 7043564f093SHong Zhang Collective on Mat 7053564f093SHong Zhang 7063564f093SHong Zhang Input Parameters: 7073564f093SHong Zhang + A - FFTW matrix 7083564f093SHong Zhang - x - the PETSc vector 7093564f093SHong Zhang 7103564f093SHong Zhang Output Parameters: 7113564f093SHong Zhang . y - the FFTW vector 7123564f093SHong Zhang 713b2b77a04SHong Zhang Level: intermediate 7143564f093SHong Zhang 71511a5261eSBarry Smith Note: 71611a5261eSBarry Smith For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 71797504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 71897504071SAmlan Barua zeros. This routine does that job by scattering operation. 71997504071SAmlan Barua 72011a5261eSBarry Smith .seealso: `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()` 7213564f093SHong Zhang @*/ 7229371c9d4SSatish Balay PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y) { 7233564f093SHong Zhang PetscFunctionBegin; 724cac4c232SBarry Smith PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y)); 7253564f093SHong Zhang PetscFunctionReturn(0); 7263564f093SHong Zhang } 7273c3a9c75SAmlan Barua 7289371c9d4SSatish Balay PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y) { 729ce94432eSBarry Smith MPI_Comm comm; 7303c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT *)A->data; 7319496c9aeSAmlan Barua PetscInt low; 7327a21806cSHong Zhang PetscMPIInt rank, size; 7337a21806cSHong Zhang PetscInt vsize, vsize1; 7343c3a9c75SAmlan Barua VecScatter vecscat; 7350cf2e031SBarry Smith IS list1; 7363c3a9c75SAmlan Barua 7373564f093SHong Zhang PetscFunctionBegin; 7389566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 7399566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 7409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 7419566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(y, &low, NULL)); 7423c3a9c75SAmlan Barua 7433564f093SHong Zhang if (size == 1) { 7449566063dSJacob Faibussowitsch PetscCall(VecGetSize(x, &vsize)); 7459566063dSJacob Faibussowitsch PetscCall(VecGetSize(y, &vsize1)); 7469566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1)); 7479566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 7489566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7499566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7509566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 7519566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 7520cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 7533564f093SHong Zhang } else { 7540cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 7550cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 7560cf2e031SBarry Smith ptrdiff_t local_n0, local_0_start; 7570cf2e031SBarry Smith ptrdiff_t local_n1, local_1_start; 7580cf2e031SBarry Smith IS list2; 7590cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 7600cf2e031SBarry Smith PetscInt i, j, k, partial_dim; 7610cf2e031SBarry Smith PetscInt *indx1, *indx2, tempindx, tempindx1; 7620cf2e031SBarry Smith PetscInt NM; 7630cf2e031SBarry Smith ptrdiff_t temp; 7640cf2e031SBarry Smith #endif 7650cf2e031SBarry Smith 7663c3a9c75SAmlan Barua switch (ndim) { 7673c3a9c75SAmlan Barua case 1: 76864657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7697a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 77026fbe8dcSKarl Rupp 7719566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1)); 7729566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2)); 7739566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 7749566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7759566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7769566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 7779566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 7789566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 77964657d84SAmlan Barua #else 780e5d7f247SAmlan Barua SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 78164657d84SAmlan Barua #endif 7823c3a9c75SAmlan Barua break; 7833c3a9c75SAmlan Barua case 2: 784bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7857a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 78626fbe8dcSKarl Rupp 7879566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 7889566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 7899566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 7909566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7919566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7929566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 7939566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 7949566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 795bd59e6c6SAmlan Barua #else 796c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 79726fbe8dcSKarl Rupp 7989566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1)); 7999566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2)); 8003c3a9c75SAmlan Barua 8013564f093SHong Zhang if (dim[1] % 2 == 0) { 8023c3a9c75SAmlan Barua NM = dim[1] + 2; 8033564f093SHong Zhang } else { 8043c3a9c75SAmlan Barua NM = dim[1] + 1; 8053564f093SHong Zhang } 8063c3a9c75SAmlan Barua for (i = 0; i < local_n0; i++) { 8073c3a9c75SAmlan Barua for (j = 0; j < dim[1]; j++) { 8083c3a9c75SAmlan Barua tempindx = i * dim[1] + j; 8093c3a9c75SAmlan Barua tempindx1 = i * NM + j; 81026fbe8dcSKarl Rupp 8115b3e41ffSAmlan Barua indx1[tempindx] = local_0_start * dim[1] + tempindx; 8123c3a9c75SAmlan Barua indx2[tempindx] = low + tempindx1; 8133c3a9c75SAmlan Barua } 8143c3a9c75SAmlan Barua } 8153c3a9c75SAmlan Barua 8169566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 8179566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 8183c3a9c75SAmlan Barua 8199566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8209566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8219566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8229566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8239566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8249566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 8259566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 8269566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 827bd59e6c6SAmlan Barua #endif 8289496c9aeSAmlan Barua break; 8293c3a9c75SAmlan Barua 8303c3a9c75SAmlan Barua case 3: 831bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8327a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 83326fbe8dcSKarl Rupp 8349566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 8359566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 8369566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8379566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8389566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8399566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8409566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8419566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 842bd59e6c6SAmlan Barua #else 843c4762a1bSJed Brown /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 844f1ade23cSHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform"); 8457a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 84626fbe8dcSKarl Rupp 8479566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1)); 8489566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2)); 84951d1eed7SAmlan Barua 850db4deed7SKarl Rupp if (dim[2] % 2 == 0) NM = dim[2] + 2; 851db4deed7SKarl Rupp else NM = dim[2] + 1; 85251d1eed7SAmlan Barua 85351d1eed7SAmlan Barua for (i = 0; i < local_n0; i++) { 85451d1eed7SAmlan Barua for (j = 0; j < dim[1]; j++) { 85551d1eed7SAmlan Barua for (k = 0; k < dim[2]; k++) { 85651d1eed7SAmlan Barua tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 85751d1eed7SAmlan Barua tempindx1 = i * dim[1] * NM + j * NM + k; 85826fbe8dcSKarl Rupp 85951d1eed7SAmlan Barua indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 86051d1eed7SAmlan Barua indx2[tempindx] = low + tempindx1; 86151d1eed7SAmlan Barua } 86251d1eed7SAmlan Barua } 86351d1eed7SAmlan Barua } 86451d1eed7SAmlan Barua 8659566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 8669566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 8679566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8689566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8699566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8709566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8719566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8729566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 8739566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 8749566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 875bd59e6c6SAmlan Barua #endif 8769496c9aeSAmlan Barua break; 8773c3a9c75SAmlan Barua 8783c3a9c75SAmlan Barua default: 879bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8807a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 88126fbe8dcSKarl Rupp 8829566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 8839566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 8849566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8859566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8869566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8879566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8889566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8899566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 890bd59e6c6SAmlan Barua #else 891c4762a1bSJed Brown /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 892f1ade23cSHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform"); 893e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 89426fbe8dcSKarl Rupp 895e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 89626fbe8dcSKarl Rupp 8977a21806cSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 89826fbe8dcSKarl Rupp 899e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 900e5d7f247SAmlan Barua 901e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 902e5d7f247SAmlan Barua 9039566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1)); 9049566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2)); 905e5d7f247SAmlan Barua 906db4deed7SKarl Rupp if (dim[ndim - 1] % 2 == 0) NM = 2; 907db4deed7SKarl Rupp else NM = 1; 908e5d7f247SAmlan Barua 9096971673cSAmlan Barua j = low; 9103564f093SHong Zhang for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) { 9116971673cSAmlan Barua indx1[i] = local_0_start * partial_dim + i; 9126971673cSAmlan Barua indx2[i] = j; 91326fbe8dcSKarl Rupp if (k % dim[ndim - 1] == 0) j += NM; 9146971673cSAmlan Barua j++; 9156971673cSAmlan Barua } 9169566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 9179566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 9189566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 9199566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9209566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9219566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 9229566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 9239566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 9249566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 9259566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 926bd59e6c6SAmlan Barua #endif 9279496c9aeSAmlan Barua break; 9283c3a9c75SAmlan Barua } 9290cf2e031SBarry Smith #endif 930e81bb599SAmlan Barua } 9313564f093SHong Zhang PetscFunctionReturn(0); 9323c3a9c75SAmlan Barua } 9333c3a9c75SAmlan Barua 9343564f093SHong Zhang /*@ 93511a5261eSBarry Smith VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector. 9363564f093SHong Zhang 9373564f093SHong Zhang Collective on Mat 9383564f093SHong Zhang 9393564f093SHong Zhang Input Parameters: 94011a5261eSBarry Smith + A - `MATFFTW` matrix 9413564f093SHong Zhang - x - FFTW vector 9423564f093SHong Zhang 9433564f093SHong Zhang Output Parameters: 9443564f093SHong Zhang . y - PETSc vector 9453564f093SHong Zhang 9463564f093SHong Zhang Level: intermediate 9473564f093SHong Zhang 94811a5261eSBarry Smith Note: 94911a5261eSBarry Smith While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 95011a5261eSBarry Smith `VecScatterFFTWToPetsc()` removes those extra zeros. 9513564f093SHong Zhang 95211a5261eSBarry Smith .seealso: `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()` 9533564f093SHong Zhang @*/ 9549371c9d4SSatish Balay PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y) { 9553c3a9c75SAmlan Barua PetscFunctionBegin; 956cac4c232SBarry Smith PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y)); 9573c3a9c75SAmlan Barua PetscFunctionReturn(0); 9583c3a9c75SAmlan Barua } 95954efbe56SHong Zhang 9609371c9d4SSatish Balay PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y) { 961ce94432eSBarry Smith MPI_Comm comm; 9625b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT *)A->data; 9639496c9aeSAmlan Barua PetscInt low; 9647a21806cSHong Zhang PetscMPIInt rank, size; 9655b3e41ffSAmlan Barua VecScatter vecscat; 9660cf2e031SBarry Smith IS list1; 9679496c9aeSAmlan Barua 9683564f093SHong Zhang PetscFunctionBegin; 9699566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 9709566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 9719566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 9729566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(x, &low, NULL)); 9735b3e41ffSAmlan Barua 974e81bb599SAmlan Barua if (size == 1) { 9759566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1)); 9769566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 9779566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9789566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9799566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 9809566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 981e81bb599SAmlan Barua 9820cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 9833564f093SHong Zhang } else { 9840cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 9850cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 9860cf2e031SBarry Smith ptrdiff_t local_n0, local_0_start; 9870cf2e031SBarry Smith ptrdiff_t local_n1, local_1_start; 9880cf2e031SBarry Smith IS list2; 9890cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 9900cf2e031SBarry Smith PetscInt i, j, k, partial_dim; 9910cf2e031SBarry Smith PetscInt *indx1, *indx2, tempindx, tempindx1; 9920cf2e031SBarry Smith PetscInt NM; 9930cf2e031SBarry Smith ptrdiff_t temp; 9940cf2e031SBarry Smith #endif 995e81bb599SAmlan Barua switch (ndim) { 996e81bb599SAmlan Barua case 1: 99764657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 9987a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 99926fbe8dcSKarl Rupp 10009566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1)); 10019566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2)); 10029566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 10039566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10049566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10059566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10069566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10079566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 100864657d84SAmlan Barua #else 10096a506ed8SAmlan Barua SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT"); 101064657d84SAmlan Barua #endif 10115b3e41ffSAmlan Barua break; 10125b3e41ffSAmlan Barua case 2: 1013bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10147a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 101526fbe8dcSKarl Rupp 10169566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 10179566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 10189566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 10199566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10209566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10219566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10229566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10239566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 1024bd59e6c6SAmlan Barua #else 10257a21806cSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 102626fbe8dcSKarl Rupp 10279566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1)); 10289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2)); 10295b3e41ffSAmlan Barua 1030db4deed7SKarl Rupp if (dim[1] % 2 == 0) NM = dim[1] + 2; 1031db4deed7SKarl Rupp else NM = dim[1] + 1; 10325b3e41ffSAmlan Barua 10335b3e41ffSAmlan Barua for (i = 0; i < local_n0; i++) { 10345b3e41ffSAmlan Barua for (j = 0; j < dim[1]; j++) { 10355b3e41ffSAmlan Barua tempindx = i * dim[1] + j; 10365b3e41ffSAmlan Barua tempindx1 = i * NM + j; 103726fbe8dcSKarl Rupp 10385b3e41ffSAmlan Barua indx1[tempindx] = local_0_start * dim[1] + tempindx; 10395b3e41ffSAmlan Barua indx2[tempindx] = low + tempindx1; 10405b3e41ffSAmlan Barua } 10415b3e41ffSAmlan Barua } 10425b3e41ffSAmlan Barua 10439566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 10449566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 10455b3e41ffSAmlan Barua 10469566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 10479566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10489566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10499566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10509566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10519566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 10529566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 10539566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 1054bd59e6c6SAmlan Barua #endif 10559496c9aeSAmlan Barua break; 10565b3e41ffSAmlan Barua case 3: 1057bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10587a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 105926fbe8dcSKarl Rupp 10609566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 10619566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 10629566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 10639566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10649566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10659566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10669566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10679566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 1068bd59e6c6SAmlan Barua #else 10697a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 107026fbe8dcSKarl Rupp 10719566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1)); 10729566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2)); 107351d1eed7SAmlan Barua 1074db4deed7SKarl Rupp if (dim[2] % 2 == 0) NM = dim[2] + 2; 1075db4deed7SKarl Rupp else NM = dim[2] + 1; 107651d1eed7SAmlan Barua 107751d1eed7SAmlan Barua for (i = 0; i < local_n0; i++) { 107851d1eed7SAmlan Barua for (j = 0; j < dim[1]; j++) { 107951d1eed7SAmlan Barua for (k = 0; k < dim[2]; k++) { 108051d1eed7SAmlan Barua tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 108151d1eed7SAmlan Barua tempindx1 = i * dim[1] * NM + j * NM + k; 108226fbe8dcSKarl Rupp 108351d1eed7SAmlan Barua indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 108451d1eed7SAmlan Barua indx2[tempindx] = low + tempindx1; 108551d1eed7SAmlan Barua } 108651d1eed7SAmlan Barua } 108751d1eed7SAmlan Barua } 108851d1eed7SAmlan Barua 10899566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 10909566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 109151d1eed7SAmlan Barua 10929566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 10939566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10949566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10959566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10969566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10979566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 10989566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 10999566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 1100bd59e6c6SAmlan Barua #endif 11019496c9aeSAmlan Barua break; 11025b3e41ffSAmlan Barua default: 1103bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 11047a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 110526fbe8dcSKarl Rupp 11069566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 11079566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 11089566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 11099566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11109566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11119566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 11129566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 11139566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 1114bd59e6c6SAmlan Barua #else 1115ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 111626fbe8dcSKarl Rupp 1117ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 111826fbe8dcSKarl Rupp 1119c3eba89aSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 112026fbe8dcSKarl Rupp 1121ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 1122ba6e06d0SAmlan Barua 1123ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1124ba6e06d0SAmlan Barua 11259566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1)); 11269566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2)); 1127ba6e06d0SAmlan Barua 1128db4deed7SKarl Rupp if (dim[ndim - 1] % 2 == 0) NM = 2; 1129db4deed7SKarl Rupp else NM = 1; 1130ba6e06d0SAmlan Barua 1131ba6e06d0SAmlan Barua j = low; 11323564f093SHong Zhang for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) { 1133ba6e06d0SAmlan Barua indx1[i] = local_0_start * partial_dim + i; 1134ba6e06d0SAmlan Barua indx2[i] = j; 11353564f093SHong Zhang if (k % dim[ndim - 1] == 0) j += NM; 1136ba6e06d0SAmlan Barua j++; 1137ba6e06d0SAmlan Barua } 11389566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 11399566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 1140ba6e06d0SAmlan Barua 11419566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 11429566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11439566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11449566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 11459566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 11469566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 11479566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 11489566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 1149bd59e6c6SAmlan Barua #endif 11509496c9aeSAmlan Barua break; 11515b3e41ffSAmlan Barua } 11520cf2e031SBarry Smith #endif 1153e81bb599SAmlan Barua } 11543564f093SHong Zhang PetscFunctionReturn(0); 11555b3e41ffSAmlan Barua } 11565b3e41ffSAmlan Barua 11573c3a9c75SAmlan Barua /* 11583564f093SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 11593564f093SHong Zhang 11603c3a9c75SAmlan Barua Options Database Keys: 11613c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 11623c3a9c75SAmlan Barua 11633c3a9c75SAmlan Barua Level: intermediate 11643c3a9c75SAmlan Barua 11653c3a9c75SAmlan Barua */ 11669371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) { 1167ce94432eSBarry Smith MPI_Comm comm; 1168b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 1169b2b77a04SHong Zhang Mat_FFTW *fftw; 11700cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 11715274ed1bSHong Zhang const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"}; 11725274ed1bSHong Zhang unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE}; 1173b2b77a04SHong Zhang PetscBool flg; 11744f48bc67SAmlan Barua PetscInt p_flag, partial_dim = 1, ctr; 117511902ff2SHong Zhang PetscMPIInt size, rank; 11769496c9aeSAmlan Barua ptrdiff_t *pdim; 11779496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11780cf2e031SBarry Smith PetscInt tot_dim; 11799496c9aeSAmlan Barua #endif 11809496c9aeSAmlan Barua 1181b2b77a04SHong Zhang PetscFunctionBegin; 11829566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 11839566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 11849566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1185c92418ccSAmlan Barua 11860cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 11871878d478SAmlan Barua fftw_mpi_init(); 11880cf2e031SBarry Smith #endif 118911902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t)); 119011902ff2SHong Zhang pdim[0] = dim[0]; 11918ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11928ca4c034SAmlan Barua tot_dim = 2 * dim[0]; 11938ca4c034SAmlan Barua #endif 11943564f093SHong Zhang for (ctr = 1; ctr < ndim; ctr++) { 11956882372aSHong Zhang partial_dim *= dim[ctr]; 119611902ff2SHong Zhang pdim[ctr] = dim[ctr]; 11978ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1198db4deed7SKarl Rupp if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1); 1199db4deed7SKarl Rupp else tot_dim *= dim[ctr]; 12008ca4c034SAmlan Barua #endif 12016882372aSHong Zhang } 12023c3a9c75SAmlan Barua 1203b2b77a04SHong Zhang if (size == 1) { 1204e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12059566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N)); 12060cf2e031SBarry Smith fft->n = fft->N; 1207e81bb599SAmlan Barua #else 12089566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim)); 12090cf2e031SBarry Smith fft->n = tot_dim; 12100cf2e031SBarry Smith #endif 12110cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 12120cf2e031SBarry Smith } else { 12130cf2e031SBarry Smith ptrdiff_t local_n0, local_0_start, local_n1, local_1_start; 12140cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 12150cf2e031SBarry Smith ptrdiff_t temp; 12160cf2e031SBarry Smith PetscInt N1; 1217e81bb599SAmlan Barua #endif 1218e81bb599SAmlan Barua 1219b2b77a04SHong Zhang switch (ndim) { 1220b2b77a04SHong Zhang case 1: 12213c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12223c3a9c75SAmlan Barua SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 1223e5d7f247SAmlan Barua #else 12247a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 12250cf2e031SBarry Smith fft->n = (PetscInt)local_n0; 12269566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N)); 1227e5d7f247SAmlan Barua #endif 1228b2b77a04SHong Zhang break; 1229b2b77a04SHong Zhang case 2: 12305b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 12317a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 12320cf2e031SBarry Smith fft->n = (PetscInt)local_n0 * dim[1]; 12339566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 12345b3e41ffSAmlan Barua #else 1235c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 123626fbe8dcSKarl Rupp 12370cf2e031SBarry Smith fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1); 12389566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1))); 12395b3e41ffSAmlan Barua #endif 1240b2b77a04SHong Zhang break; 1241b2b77a04SHong Zhang case 3: 124251d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12437a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 124426fbe8dcSKarl Rupp 12450cf2e031SBarry Smith fft->n = (PetscInt)local_n0 * dim[1] * dim[2]; 12469566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 124751d1eed7SAmlan Barua #else 1248c3eba89aSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 124926fbe8dcSKarl Rupp 12500cf2e031SBarry Smith fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1); 12519566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1))); 125251d1eed7SAmlan Barua #endif 1253b2b77a04SHong Zhang break; 1254b2b77a04SHong Zhang default: 1255b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12567a21806cSHong Zhang fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start); 125726fbe8dcSKarl Rupp 12580cf2e031SBarry Smith fft->n = (PetscInt)local_n0 * partial_dim; 12599566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1260b3a17365SAmlan Barua #else 1261b3a17365SAmlan Barua temp = pdim[ndim - 1]; 126226fbe8dcSKarl Rupp 1263b3a17365SAmlan Barua pdim[ndim - 1] = temp / 2 + 1; 126426fbe8dcSKarl Rupp 1265c3eba89aSHong Zhang fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 126626fbe8dcSKarl Rupp 12670cf2e031SBarry Smith fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp; 12680cf2e031SBarry Smith N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp); 126926fbe8dcSKarl Rupp 1270b3a17365SAmlan Barua pdim[ndim - 1] = temp; 127126fbe8dcSKarl Rupp 12729566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1)); 1273b3a17365SAmlan Barua #endif 1274b2b77a04SHong Zhang break; 1275b2b77a04SHong Zhang } 12760cf2e031SBarry Smith #endif 1277b2b77a04SHong Zhang } 12782277172eSMark Adams free(pdim); 12799566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW)); 1280*4dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&fftw)); 1281b2b77a04SHong Zhang fft->data = (void *)fftw; 1282b2b77a04SHong Zhang 12830c74a584SJed Brown fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1284e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 128526fbe8dcSKarl Rupp 12869566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw))); 12878c1d5ab3SHong Zhang if (size == 1) { 1288a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 1289a1b6d50cSHong Zhang fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim); 1290a1b6d50cSHong Zhang #else 12918c1d5ab3SHong Zhang fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim); 1292a1b6d50cSHong Zhang #endif 12938c1d5ab3SHong Zhang } 129426fbe8dcSKarl Rupp 1295b1301b2eSHong Zhang for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr]; 1296c92418ccSAmlan Barua 1297f4259b30SLisandro Dalcin fftw->p_forward = NULL; 1298f4259b30SLisandro Dalcin fftw->p_backward = NULL; 1299b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1300b2b77a04SHong Zhang 1301b2b77a04SHong Zhang if (size == 1) { 1302b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1303b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 13040cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 1305b2b77a04SHong Zhang } else { 1306b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1307b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 13080cf2e031SBarry Smith #endif 1309b2b77a04SHong Zhang } 1310b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1311b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 13124a52bad8SHong Zhang A->preallocated = PETSC_TRUE; 131326fbe8dcSKarl Rupp 13149566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW)); 13159566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW)); 13169566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW)); 1317b2b77a04SHong Zhang 1318b2b77a04SHong Zhang /* get runtime options */ 1319d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat"); 13209566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg)); 13215f80ce2aSJacob Faibussowitsch if (flg) fftw->p_flag = iplans[p_flag]; 1322d0609cedSBarry Smith PetscOptionsEnd(); 1323b2b77a04SHong Zhang PetscFunctionReturn(0); 1324b2b77a04SHong Zhang } 1325