1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4c4762a1bSJed Brown Testing examples can be found in ~src/mat/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 90cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 10c6db04a5SJed Brown #include <fftw3-mpi.h> 110cf2e031SBarry Smith #else 120cf2e031SBarry Smith #include <fftw3.h> 130cf2e031SBarry Smith #endif 14b2b77a04SHong Zhang EXTERN_C_END 15b2b77a04SHong Zhang 16b2b77a04SHong Zhang typedef struct { 17b9ae062cSHong Zhang ptrdiff_t ndim_fftw, *dim_fftw; 18a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 19a1b6d50cSHong Zhang fftw_iodim64 *iodims; 20a1b6d50cSHong Zhang #else 218c1d5ab3SHong Zhang fftw_iodim *iodims; 22a1b6d50cSHong Zhang #endif 23e5d7f247SAmlan Barua PetscInt partial_dim; 24b2b77a04SHong Zhang fftw_plan p_forward, p_backward; 25b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 26b2b77a04SHong Zhang PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays becaue fftw plan should be 27b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 28b2b77a04SHong Zhang } Mat_FFTW; 29b2b77a04SHong Zhang 30b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec); 31b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec); 320cf2e031SBarry Smith extern PetscErrorCode MatDestroy_FFTW(Mat); 330cf2e031SBarry Smith extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *); 340cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 35b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec); 36b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec); 37b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 380cf2e031SBarry Smith #endif 39b2b77a04SHong Zhang 400cf2e031SBarry Smith /* 410cf2e031SBarry Smith MatMult_SeqFFTW performs forward DFT 4297504071SAmlan Barua Input parameter: 433564f093SHong Zhang A - the matrix 4497504071SAmlan Barua x - the vector on which FDFT will be performed 4597504071SAmlan Barua 4697504071SAmlan Barua Output parameter: 4797504071SAmlan Barua y - vector that stores result of FDFT 4897504071SAmlan Barua */ 49d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y) 50d71ae5a4SJacob Faibussowitsch { 51b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 52b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 53f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 54f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 551acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 56a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 57a1b6d50cSHong Zhang fftw_iodim64 *iodims; 58a1b6d50cSHong Zhang #else 598c1d5ab3SHong Zhang fftw_iodim *iodims; 60a1b6d50cSHong Zhang #endif 611acd80e4SHong Zhang PetscInt i; 621acd80e4SHong Zhang #endif 631acd80e4SHong Zhang PetscInt ndim = fft->ndim, *dim = fft->dim; 64b2b77a04SHong Zhang 65b2b77a04SHong Zhang PetscFunctionBegin; 669566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 679566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 686aad120cSJose E. Roman if (!fftw->p_forward) { /* create a plan, then execute it */ 69b2b77a04SHong Zhang switch (ndim) { 70b2b77a04SHong Zhang case 1: 7158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 72b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 7358a912c5SAmlan Barua #else 7458a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 7558a912c5SAmlan Barua #endif 76b2b77a04SHong Zhang break; 77b2b77a04SHong Zhang case 2: 7858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 79b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 8058a912c5SAmlan Barua #else 8158a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 8258a912c5SAmlan Barua #endif 83b2b77a04SHong Zhang break; 84b2b77a04SHong Zhang case 3: 8558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 86b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 8758a912c5SAmlan Barua #else 8858a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 8958a912c5SAmlan Barua #endif 90b2b77a04SHong Zhang break; 91b2b77a04SHong Zhang default: 9258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 93a1b6d50cSHong Zhang iodims = fftw->iodims; 94a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 958c1d5ab3SHong Zhang if (ndim) { 96a1b6d50cSHong Zhang iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1]; 978c1d5ab3SHong Zhang iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 988c1d5ab3SHong Zhang for (i = ndim - 2; i >= 0; --i) { 99a1b6d50cSHong Zhang iodims[i].n = (ptrdiff_t)dim[i]; 1008c1d5ab3SHong Zhang iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 1018c1d5ab3SHong Zhang } 1028c1d5ab3SHong Zhang } 103a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 104a1b6d50cSHong Zhang #else 105a1b6d50cSHong Zhang if (ndim) { 106a1b6d50cSHong Zhang iodims[ndim - 1].n = (int)dim[ndim - 1]; 107a1b6d50cSHong Zhang iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 108a1b6d50cSHong Zhang for (i = ndim - 2; i >= 0; --i) { 109a1b6d50cSHong Zhang iodims[i].n = (int)dim[i]; 110a1b6d50cSHong Zhang iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 111a1b6d50cSHong Zhang } 112a1b6d50cSHong Zhang } 113a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 114a1b6d50cSHong Zhang #endif 115a1b6d50cSHong Zhang 11658a912c5SAmlan Barua #else 117a31b9140SHong Zhang fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 11858a912c5SAmlan Barua #endif 119b2b77a04SHong Zhang break; 120b2b77a04SHong Zhang } 121f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *)x_array; 122b2b77a04SHong Zhang fftw->foutarray = y_array; 123b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 124b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 125b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 126b2b77a04SHong Zhang } else { /* use existing plan */ 127b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 1287e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 129b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 1307e4bc134SDominic Meiser #else 1317e4bc134SDominic Meiser fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array); 1327e4bc134SDominic Meiser #endif 133b2b77a04SHong Zhang } else { 134b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 135b2b77a04SHong Zhang } 136b2b77a04SHong Zhang } 1379566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 1389566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 139b2b77a04SHong Zhang PetscFunctionReturn(0); 140b2b77a04SHong Zhang } 141b2b77a04SHong Zhang 14297504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT 14397504071SAmlan Barua Input parameter: 1443564f093SHong Zhang A - the matrix 14597504071SAmlan Barua x - the vector on which BDFT will be performed 14697504071SAmlan Barua 14797504071SAmlan Barua Output parameter: 14897504071SAmlan Barua y - vector that stores result of BDFT 14997504071SAmlan Barua */ 15097504071SAmlan Barua 151d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y) 152d71ae5a4SJacob Faibussowitsch { 153b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 154b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 155f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 156f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 157b2b77a04SHong Zhang PetscInt ndim = fft->ndim, *dim = fft->dim; 1581acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 159a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 160a1b6d50cSHong Zhang fftw_iodim64 *iodims = fftw->iodims; 161a1b6d50cSHong Zhang #else 162a1b6d50cSHong Zhang fftw_iodim *iodims = fftw->iodims; 163a1b6d50cSHong Zhang #endif 1641acd80e4SHong Zhang #endif 165b2b77a04SHong Zhang 166b2b77a04SHong Zhang PetscFunctionBegin; 1679566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 1689566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 1696aad120cSJose E. Roman if (!fftw->p_backward) { /* create a plan, then execute it */ 170b2b77a04SHong Zhang switch (ndim) { 171b2b77a04SHong Zhang case 1: 17258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 173b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 17458a912c5SAmlan Barua #else 175e81bb599SAmlan Barua fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 17658a912c5SAmlan Barua #endif 177b2b77a04SHong Zhang break; 178b2b77a04SHong Zhang case 2: 17958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 180b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 18158a912c5SAmlan Barua #else 182e81bb599SAmlan Barua fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 18358a912c5SAmlan Barua #endif 184b2b77a04SHong Zhang break; 185b2b77a04SHong Zhang case 3: 18658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 187b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 18858a912c5SAmlan Barua #else 189e81bb599SAmlan Barua fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 19058a912c5SAmlan Barua #endif 191b2b77a04SHong Zhang break; 192b2b77a04SHong Zhang default: 19358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 194a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 195a1b6d50cSHong Zhang fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 196a1b6d50cSHong Zhang #else 1978c1d5ab3SHong Zhang fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 198a1b6d50cSHong Zhang #endif 19958a912c5SAmlan Barua #else 200a31b9140SHong Zhang fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 20158a912c5SAmlan Barua #endif 202b2b77a04SHong Zhang break; 203b2b77a04SHong Zhang } 204f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *)x_array; 205b2b77a04SHong Zhang fftw->boutarray = y_array; 2062f613bf5SBarry Smith fftw_execute(fftw->p_backward); 207b2b77a04SHong Zhang } else { /* use existing plan */ 208b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 2097e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 210b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 2117e4bc134SDominic Meiser #else 2127e4bc134SDominic Meiser fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array); 2137e4bc134SDominic Meiser #endif 214b2b77a04SHong Zhang } else { 2152f613bf5SBarry Smith fftw_execute(fftw->p_backward); 216b2b77a04SHong Zhang } 217b2b77a04SHong Zhang } 2189566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 2199566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 220b2b77a04SHong Zhang PetscFunctionReturn(0); 221b2b77a04SHong Zhang } 222b2b77a04SHong Zhang 2230cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 22497504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel 22597504071SAmlan Barua Input parameter: 2263564f093SHong Zhang A - the matrix 22797504071SAmlan Barua x - the vector on which FDFT will be performed 22897504071SAmlan Barua 22997504071SAmlan Barua Output parameter: 23097504071SAmlan Barua y - vector that stores result of FDFT 23197504071SAmlan Barua */ 232d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y) 233d71ae5a4SJacob Faibussowitsch { 234b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 235b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 236f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 237f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 238c92418ccSAmlan Barua PetscInt ndim = fft->ndim, *dim = fft->dim; 239ce94432eSBarry Smith MPI_Comm comm; 240b2b77a04SHong Zhang 241b2b77a04SHong Zhang PetscFunctionBegin; 2429566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2439566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 2449566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 2456aad120cSJose E. Roman if (!fftw->p_forward) { /* create a plan, then execute it */ 246b2b77a04SHong Zhang switch (ndim) { 247b2b77a04SHong Zhang case 1: 2483c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 249b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 250ae0a50aaSHong Zhang #else 2514f8276c3SHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 2523c3a9c75SAmlan Barua #endif 253b2b77a04SHong Zhang break; 254b2b77a04SHong Zhang case 2: 25597504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 256b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 2573c3a9c75SAmlan Barua #else 2583c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 2593c3a9c75SAmlan Barua #endif 260b2b77a04SHong Zhang break; 261b2b77a04SHong Zhang case 3: 2623c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 263b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 2643c3a9c75SAmlan Barua #else 26551d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 2663c3a9c75SAmlan Barua #endif 267b2b77a04SHong Zhang break; 268b2b77a04SHong Zhang default: 2693c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 270c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 2713c3a9c75SAmlan Barua #else 2723c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 2733c3a9c75SAmlan Barua #endif 274b2b77a04SHong Zhang break; 275b2b77a04SHong Zhang } 276f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *)x_array; 277b2b77a04SHong Zhang fftw->foutarray = y_array; 278b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 279b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 280b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 281b2b77a04SHong Zhang } else { /* use existing plan */ 282b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 283b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 284b2b77a04SHong Zhang } else { 285b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 286b2b77a04SHong Zhang } 287b2b77a04SHong Zhang } 2889566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 2899566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 290b2b77a04SHong Zhang PetscFunctionReturn(0); 291b2b77a04SHong Zhang } 292b2b77a04SHong Zhang 2930cf2e031SBarry Smith /* 2940cf2e031SBarry Smith MatMultTranspose_MPIFFTW performs parallel backward DFT 29597504071SAmlan Barua Input parameter: 2963564f093SHong Zhang A - the matrix 29797504071SAmlan Barua x - the vector on which BDFT will be performed 29897504071SAmlan Barua 29997504071SAmlan Barua Output parameter: 30097504071SAmlan Barua y - vector that stores result of BDFT 30197504071SAmlan Barua */ 302d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y) 303d71ae5a4SJacob Faibussowitsch { 304b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 305b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 306f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 307f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 308c92418ccSAmlan Barua PetscInt ndim = fft->ndim, *dim = fft->dim; 309ce94432eSBarry Smith MPI_Comm comm; 310b2b77a04SHong Zhang 311b2b77a04SHong Zhang PetscFunctionBegin; 3129566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 3139566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array)); 3149566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array)); 3156aad120cSJose E. Roman if (!fftw->p_backward) { /* create a plan, then execute it */ 316b2b77a04SHong Zhang switch (ndim) { 317b2b77a04SHong Zhang case 1: 3183c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 319b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 320ae0a50aaSHong Zhang #else 3214f8276c3SHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 3223c3a9c75SAmlan Barua #endif 323b2b77a04SHong Zhang break; 324b2b77a04SHong Zhang case 2: 32597504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 326b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 3273c3a9c75SAmlan Barua #else 3283c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 3293c3a9c75SAmlan Barua #endif 330b2b77a04SHong Zhang break; 331b2b77a04SHong Zhang case 3: 3323c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 333b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 3343c3a9c75SAmlan Barua #else 3353c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 3363c3a9c75SAmlan Barua #endif 337b2b77a04SHong Zhang break; 338b2b77a04SHong Zhang default: 3393c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 340c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 3413c3a9c75SAmlan Barua #else 3423c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 3433c3a9c75SAmlan Barua #endif 344b2b77a04SHong Zhang break; 345b2b77a04SHong Zhang } 346f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *)x_array; 347b2b77a04SHong Zhang fftw->boutarray = y_array; 3482f613bf5SBarry Smith fftw_execute(fftw->p_backward); 349b2b77a04SHong Zhang } else { /* use existing plan */ 350b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 351b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 352b2b77a04SHong Zhang } else { 3532f613bf5SBarry Smith fftw_execute(fftw->p_backward); 354b2b77a04SHong Zhang } 355b2b77a04SHong Zhang } 3569566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array)); 3579566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array)); 358b2b77a04SHong Zhang PetscFunctionReturn(0); 359b2b77a04SHong Zhang } 3600cf2e031SBarry Smith #endif 361b2b77a04SHong Zhang 362d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_FFTW(Mat A) 363d71ae5a4SJacob Faibussowitsch { 364b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 365b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 366b2b77a04SHong Zhang 367b2b77a04SHong Zhang PetscFunctionBegin; 368b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 369b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 370ad540459SPierre Jolivet if (fftw->iodims) free(fftw->iodims); 3719566063dSJacob Faibussowitsch PetscCall(PetscFree(fftw->dim_fftw)); 3729566063dSJacob Faibussowitsch PetscCall(PetscFree(fft->data)); 3732e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL)); 3742e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL)); 3752e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL)); 3760cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 3776ccf0b3eSHong Zhang fftw_mpi_cleanup(); 3780cf2e031SBarry Smith #endif 379b2b77a04SHong Zhang PetscFunctionReturn(0); 380b2b77a04SHong Zhang } 381b2b77a04SHong Zhang 3820cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 383c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 384d71ae5a4SJacob Faibussowitsch PetscErrorCode VecDestroy_MPIFFTW(Vec v) 385d71ae5a4SJacob Faibussowitsch { 386b2b77a04SHong Zhang PetscScalar *array; 387b2b77a04SHong Zhang 388b2b77a04SHong Zhang PetscFunctionBegin; 3899566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &array)); 3902f613bf5SBarry Smith fftw_free((fftw_complex *)array); 3919566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &array)); 3929566063dSJacob Faibussowitsch PetscCall(VecDestroy_MPI(v)); 393b2b77a04SHong Zhang PetscFunctionReturn(0); 394b2b77a04SHong Zhang } 3950cf2e031SBarry Smith #endif 396b2b77a04SHong Zhang 3970cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 398d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new) 399d71ae5a4SJacob Faibussowitsch { 4005b113e21Ss-sajid-ali Mat A; 4015b113e21Ss-sajid-ali 4025b113e21Ss-sajid-ali PetscFunctionBegin; 4039566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A)); 4049566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL)); 4055b113e21Ss-sajid-ali PetscFunctionReturn(0); 4065b113e21Ss-sajid-ali } 4075b113e21Ss-sajid-ali 408d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new) 409d71ae5a4SJacob Faibussowitsch { 4105b113e21Ss-sajid-ali Mat A; 4115b113e21Ss-sajid-ali 4125b113e21Ss-sajid-ali PetscFunctionBegin; 4139566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A)); 4149566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL)); 4155b113e21Ss-sajid-ali PetscFunctionReturn(0); 4165b113e21Ss-sajid-ali } 4175b113e21Ss-sajid-ali 418d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) 419d71ae5a4SJacob Faibussowitsch { 4205b113e21Ss-sajid-ali Mat A; 4215b113e21Ss-sajid-ali 4225b113e21Ss-sajid-ali PetscFunctionBegin; 4239566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A)); 4249566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new)); 4255b113e21Ss-sajid-ali PetscFunctionReturn(0); 4265b113e21Ss-sajid-ali } 4270cf2e031SBarry Smith #endif 4285b113e21Ss-sajid-ali 4294be45526SHong Zhang /*@ 4302a7a6963SBarry Smith MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 43111a5261eSBarry Smith parallel layout determined by `MATFFTW` 4324f7415efSAmlan Barua 433*c3339decSBarry Smith Collective 4344f7415efSAmlan Barua 4354f7415efSAmlan Barua Input Parameter: 4363564f093SHong Zhang . A - the matrix 4374f7415efSAmlan Barua 438d8d19677SJose E. Roman Output Parameters: 439cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW 4406b867d5aSJose E. Roman . y - (optional) output vector of forward FFTW 441cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW 4424f7415efSAmlan Barua 4434f7415efSAmlan Barua Level: advanced 4443564f093SHong Zhang 44511a5261eSBarry Smith Notes: 44611a5261eSBarry Smith The parallel layout of output of forward FFTW is always same as the input 44797504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 44897504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 44911a5261eSBarry Smith 45011a5261eSBarry Smith We need to provide enough space while doing parallel real transform. 45197504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 45297504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 453e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 454e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two 455e0ec536eSAmlan Barua zeros if it is odd only one zero is needed. 45611a5261eSBarry Smith 457e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local 458e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for 459e0ec536eSAmlan Barua each processor and returns that. 4604f7415efSAmlan Barua 46111a5261eSBarry Smith Developer Note: 46211a5261eSBarry Smith How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically? 46311a5261eSBarry Smith 46411a5261eSBarry Smith .seealso: `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()` 4654be45526SHong Zhang @*/ 466d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z) 467d71ae5a4SJacob Faibussowitsch { 4684be45526SHong Zhang PetscFunctionBegin; 469cac4c232SBarry Smith PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z)); 4704be45526SHong Zhang PetscFunctionReturn(0); 4714be45526SHong Zhang } 4724be45526SHong Zhang 473d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout) 474d71ae5a4SJacob Faibussowitsch { 4754f7415efSAmlan Barua PetscMPIInt size, rank; 476ce94432eSBarry Smith MPI_Comm comm; 4774f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT *)A->data; 4784f7415efSAmlan Barua 4794f7415efSAmlan Barua PetscFunctionBegin; 4804f7415efSAmlan Barua PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4814f7415efSAmlan Barua PetscValidType(A, 1); 4829566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 4834f7415efSAmlan Barua 4849566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 4859566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4864f7415efSAmlan Barua if (size == 1) { /* sequential case */ 4874f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4889566063dSJacob Faibussowitsch if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin)); 4899566063dSJacob Faibussowitsch if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout)); 4909566063dSJacob Faibussowitsch if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout)); 4914f7415efSAmlan Barua #else 4929566063dSJacob Faibussowitsch if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin)); 4939566063dSJacob Faibussowitsch if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout)); 4949566063dSJacob Faibussowitsch if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout)); 4954f7415efSAmlan Barua #endif 4960cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 49797504071SAmlan Barua } else { /* parallel cases */ 4980cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 4990cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 5004f7415efSAmlan Barua ptrdiff_t alloc_local, local_n0, local_0_start; 5019496c9aeSAmlan Barua ptrdiff_t local_n1; 5029496c9aeSAmlan Barua fftw_complex *data_fout; 5039496c9aeSAmlan Barua ptrdiff_t local_1_start; 5049496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 5059496c9aeSAmlan Barua fftw_complex *data_fin, *data_bout; 5069496c9aeSAmlan Barua #else 5074f7415efSAmlan Barua double *data_finr, *data_boutr; 5086ccf0b3eSHong Zhang PetscInt n1, N1; 5099496c9aeSAmlan Barua ptrdiff_t temp; 5109496c9aeSAmlan Barua #endif 5119496c9aeSAmlan Barua 5124f7415efSAmlan Barua switch (ndim) { 5134f7415efSAmlan Barua case 1: 51457625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5154f8276c3SHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform"); 51657625b84SAmlan Barua #else 51757625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 51857625b84SAmlan Barua if (fin) { 51957625b84SAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5209566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin)); 5219566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 5225b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 52357625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 52457625b84SAmlan Barua } 52557625b84SAmlan Barua if (fout) { 52657625b84SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5279566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout)); 5289566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 5295b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 53057625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 53157625b84SAmlan Barua } 53257625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 53357625b84SAmlan Barua if (bout) { 53457625b84SAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5359566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout)); 5369566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 5375b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 53857625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 53957625b84SAmlan Barua } 54057625b84SAmlan Barua break; 54157625b84SAmlan Barua #endif 5424f7415efSAmlan Barua case 2: 54397504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 5444f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 5459371c9d4SSatish Balay N1 = 2 * dim[0] * (dim[1] / 2 + 1); 5469371c9d4SSatish Balay n1 = 2 * local_n0 * (dim[1] / 2 + 1); 5474f7415efSAmlan Barua if (fin) { 5484f7415efSAmlan Barua data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 5499566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 5509566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 5515b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5524f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5534f7415efSAmlan Barua } 55457625b84SAmlan Barua if (fout) { 55557625b84SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5569566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout)); 5579566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 5585b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 55957625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 56057625b84SAmlan Barua } 5615b113e21Ss-sajid-ali if (bout) { 5625b113e21Ss-sajid-ali data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 5639566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 5649566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 5655b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 5665b113e21Ss-sajid-ali (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5675b113e21Ss-sajid-ali } 5684f7415efSAmlan Barua #else 5694f7415efSAmlan Barua /* Get local size */ 5704f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 5714f7415efSAmlan Barua if (fin) { 5724f7415efSAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5739566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 5749566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 5755b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5764f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5774f7415efSAmlan Barua } 5784f7415efSAmlan Barua if (fout) { 5794f7415efSAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5809566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 5819566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 5825b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 5834f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5844f7415efSAmlan Barua } 5854f7415efSAmlan Barua if (bout) { 5864f7415efSAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 5879566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 5889566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 5895b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 5904f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5914f7415efSAmlan Barua } 5924f7415efSAmlan Barua #endif 5934f7415efSAmlan Barua break; 5944f7415efSAmlan Barua case 3: 5954f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5964f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 5979371c9d4SSatish Balay N1 = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1); 5989371c9d4SSatish Balay n1 = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1); 5994f7415efSAmlan Barua if (fin) { 6004f7415efSAmlan Barua data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 6019566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 6029566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 6035b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6044f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6054f7415efSAmlan Barua } 6065b113e21Ss-sajid-ali if (fout) { 6075b113e21Ss-sajid-ali data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6089566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout)); 6099566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 6105b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6115b113e21Ss-sajid-ali (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6125b113e21Ss-sajid-ali } 6134f7415efSAmlan Barua if (bout) { 6144f7415efSAmlan Barua data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 6159566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 6169566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6175b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6184f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6194f7415efSAmlan Barua } 6204f7415efSAmlan Barua #else 6210c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 6220c9b18e4SAmlan Barua if (fin) { 6230c9b18e4SAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6249566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 6259566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 6265b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6270c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6280c9b18e4SAmlan Barua } 6290c9b18e4SAmlan Barua if (fout) { 6300c9b18e4SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6319566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 6329566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 6335b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6340c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6350c9b18e4SAmlan Barua } 6360c9b18e4SAmlan Barua if (bout) { 6370c9b18e4SAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6389566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 6399566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6405b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6410c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6420c9b18e4SAmlan Barua } 6434f7415efSAmlan Barua #endif 6444f7415efSAmlan Barua break; 6454f7415efSAmlan Barua default: 6464f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6474f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 6484f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 6494f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 6500cf2e031SBarry Smith N1 = 2 * fft->N * (PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw - 1]) / ((PetscInt)temp); 6514f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 6524f7415efSAmlan Barua 6534f7415efSAmlan Barua if (fin) { 6544f7415efSAmlan Barua data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 6559566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin)); 6569566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 6575b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6584f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6594f7415efSAmlan Barua } 6605b113e21Ss-sajid-ali if (fout) { 6615b113e21Ss-sajid-ali data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6629566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout)); 6639566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 6645b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6655b113e21Ss-sajid-ali (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6665b113e21Ss-sajid-ali } 6674f7415efSAmlan Barua if (bout) { 6684f7415efSAmlan Barua data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 6699566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout)); 6709566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6715b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6729496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6734f7415efSAmlan Barua } 6744f7415efSAmlan Barua #else 6750c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 6760c9b18e4SAmlan Barua if (fin) { 6770c9b18e4SAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6789566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 6799566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 6805b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6810c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6820c9b18e4SAmlan Barua } 6830c9b18e4SAmlan Barua if (fout) { 6840c9b18e4SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6859566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 6869566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 6875b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6880c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6890c9b18e4SAmlan Barua } 6900c9b18e4SAmlan Barua if (bout) { 6910c9b18e4SAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 6929566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 6939566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 6945b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6950c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6960c9b18e4SAmlan Barua } 6974f7415efSAmlan Barua #endif 6984f7415efSAmlan Barua break; 6994f7415efSAmlan Barua } 700f9d91177SJunchao Zhang /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 701f9d91177SJunchao Zhang v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 702f9d91177SJunchao Zhang memory leaks. We void these pointers here to avoid acident uses. 703f9d91177SJunchao Zhang */ 704f9d91177SJunchao Zhang if (fin) (*fin)->ops->replacearray = NULL; 705f9d91177SJunchao Zhang if (fout) (*fout)->ops->replacearray = NULL; 706f9d91177SJunchao Zhang if (bout) (*bout)->ops->replacearray = NULL; 7070cf2e031SBarry Smith #endif 7084f7415efSAmlan Barua } 7094f7415efSAmlan Barua PetscFunctionReturn(0); 7104f7415efSAmlan Barua } 7114f7415efSAmlan Barua 7123564f093SHong Zhang /*@ 71311a5261eSBarry Smith VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls. 71454efbe56SHong Zhang 715*c3339decSBarry Smith Collective 7163564f093SHong Zhang 7173564f093SHong Zhang Input Parameters: 7183564f093SHong Zhang + A - FFTW matrix 7193564f093SHong Zhang - x - the PETSc vector 7203564f093SHong Zhang 7213564f093SHong Zhang Output Parameters: 7223564f093SHong Zhang . y - the FFTW vector 7233564f093SHong Zhang 724b2b77a04SHong Zhang Level: intermediate 7253564f093SHong Zhang 72611a5261eSBarry Smith Note: 72711a5261eSBarry Smith For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 72897504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 72997504071SAmlan Barua zeros. This routine does that job by scattering operation. 73097504071SAmlan Barua 73111a5261eSBarry Smith .seealso: `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()` 7323564f093SHong Zhang @*/ 733d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y) 734d71ae5a4SJacob Faibussowitsch { 7353564f093SHong Zhang PetscFunctionBegin; 736cac4c232SBarry Smith PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y)); 7373564f093SHong Zhang PetscFunctionReturn(0); 7383564f093SHong Zhang } 7393c3a9c75SAmlan Barua 740d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y) 741d71ae5a4SJacob Faibussowitsch { 742ce94432eSBarry Smith MPI_Comm comm; 7433c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT *)A->data; 7449496c9aeSAmlan Barua PetscInt low; 7457a21806cSHong Zhang PetscMPIInt rank, size; 7467a21806cSHong Zhang PetscInt vsize, vsize1; 7473c3a9c75SAmlan Barua VecScatter vecscat; 7480cf2e031SBarry Smith IS list1; 7493c3a9c75SAmlan Barua 7503564f093SHong Zhang PetscFunctionBegin; 7519566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 7529566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 7539566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 7549566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(y, &low, NULL)); 7553c3a9c75SAmlan Barua 7563564f093SHong Zhang if (size == 1) { 7579566063dSJacob Faibussowitsch PetscCall(VecGetSize(x, &vsize)); 7589566063dSJacob Faibussowitsch PetscCall(VecGetSize(y, &vsize1)); 7599566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1)); 7609566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 7619566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7629566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7639566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 7649566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 7650cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 7663564f093SHong Zhang } else { 7670cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 7680cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 7690cf2e031SBarry Smith ptrdiff_t local_n0, local_0_start; 7700cf2e031SBarry Smith ptrdiff_t local_n1, local_1_start; 7710cf2e031SBarry Smith IS list2; 7720cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 7730cf2e031SBarry Smith PetscInt i, j, k, partial_dim; 7740cf2e031SBarry Smith PetscInt *indx1, *indx2, tempindx, tempindx1; 7750cf2e031SBarry Smith PetscInt NM; 7760cf2e031SBarry Smith ptrdiff_t temp; 7770cf2e031SBarry Smith #endif 7780cf2e031SBarry Smith 7793c3a9c75SAmlan Barua switch (ndim) { 7803c3a9c75SAmlan Barua case 1: 78164657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7827a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 78326fbe8dcSKarl Rupp 7849566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1)); 7859566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2)); 7869566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 7879566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7889566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 7899566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 7909566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 7919566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 79264657d84SAmlan Barua #else 793e5d7f247SAmlan Barua SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 79464657d84SAmlan Barua #endif 7953c3a9c75SAmlan Barua break; 7963c3a9c75SAmlan Barua case 2: 797bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7987a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 79926fbe8dcSKarl Rupp 8009566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 8019566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 8029566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8039566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8049566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8059566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8069566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8079566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 808bd59e6c6SAmlan Barua #else 809c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 81026fbe8dcSKarl Rupp 8119566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1)); 8129566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2)); 8133c3a9c75SAmlan Barua 8143564f093SHong Zhang if (dim[1] % 2 == 0) { 8153c3a9c75SAmlan Barua NM = dim[1] + 2; 8163564f093SHong Zhang } else { 8173c3a9c75SAmlan Barua NM = dim[1] + 1; 8183564f093SHong Zhang } 8193c3a9c75SAmlan Barua for (i = 0; i < local_n0; i++) { 8203c3a9c75SAmlan Barua for (j = 0; j < dim[1]; j++) { 8213c3a9c75SAmlan Barua tempindx = i * dim[1] + j; 8223c3a9c75SAmlan Barua tempindx1 = i * NM + j; 82326fbe8dcSKarl Rupp 8245b3e41ffSAmlan Barua indx1[tempindx] = local_0_start * dim[1] + tempindx; 8253c3a9c75SAmlan Barua indx2[tempindx] = low + tempindx1; 8263c3a9c75SAmlan Barua } 8273c3a9c75SAmlan Barua } 8283c3a9c75SAmlan Barua 8299566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 8309566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 8313c3a9c75SAmlan Barua 8329566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8339566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8349566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8359566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8369566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8379566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 8389566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 8399566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 840bd59e6c6SAmlan Barua #endif 8419496c9aeSAmlan Barua break; 8423c3a9c75SAmlan Barua 8433c3a9c75SAmlan Barua case 3: 844bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8457a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 84626fbe8dcSKarl Rupp 8479566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 8489566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 8499566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8509566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8519566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8529566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8539566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8549566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 855bd59e6c6SAmlan Barua #else 856c4762a1bSJed Brown /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 857f1ade23cSHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform"); 8587a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 85926fbe8dcSKarl Rupp 8609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1)); 8619566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2)); 86251d1eed7SAmlan Barua 863db4deed7SKarl Rupp if (dim[2] % 2 == 0) NM = dim[2] + 2; 864db4deed7SKarl Rupp else NM = dim[2] + 1; 86551d1eed7SAmlan Barua 86651d1eed7SAmlan Barua for (i = 0; i < local_n0; i++) { 86751d1eed7SAmlan Barua for (j = 0; j < dim[1]; j++) { 86851d1eed7SAmlan Barua for (k = 0; k < dim[2]; k++) { 86951d1eed7SAmlan Barua tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 87051d1eed7SAmlan Barua tempindx1 = i * dim[1] * NM + j * NM + k; 87126fbe8dcSKarl Rupp 87251d1eed7SAmlan Barua indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 87351d1eed7SAmlan Barua indx2[tempindx] = low + tempindx1; 87451d1eed7SAmlan Barua } 87551d1eed7SAmlan Barua } 87651d1eed7SAmlan Barua } 87751d1eed7SAmlan Barua 8789566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 8799566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 8809566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8819566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8829566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8839566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8849566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8859566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 8869566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 8879566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 888bd59e6c6SAmlan Barua #endif 8899496c9aeSAmlan Barua break; 8903c3a9c75SAmlan Barua 8913c3a9c75SAmlan Barua default: 892bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8937a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 89426fbe8dcSKarl Rupp 8959566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 8969566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 8979566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 8989566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 8999566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9009566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 9019566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 9029566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 903bd59e6c6SAmlan Barua #else 904c4762a1bSJed Brown /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 905f1ade23cSHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform"); 906e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 90726fbe8dcSKarl Rupp 908e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 90926fbe8dcSKarl Rupp 9107a21806cSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 91126fbe8dcSKarl Rupp 912e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 913e5d7f247SAmlan Barua 914e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 915e5d7f247SAmlan Barua 9169566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1)); 9179566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2)); 918e5d7f247SAmlan Barua 919db4deed7SKarl Rupp if (dim[ndim - 1] % 2 == 0) NM = 2; 920db4deed7SKarl Rupp else NM = 1; 921e5d7f247SAmlan Barua 9226971673cSAmlan Barua j = low; 9233564f093SHong Zhang for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) { 9246971673cSAmlan Barua indx1[i] = local_0_start * partial_dim + i; 9256971673cSAmlan Barua indx2[i] = j; 92626fbe8dcSKarl Rupp if (k % dim[ndim - 1] == 0) j += NM; 9276971673cSAmlan Barua j++; 9286971673cSAmlan Barua } 9299566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 9309566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 9319566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 9329566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9339566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9349566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 9359566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 9369566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 9379566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 9389566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 939bd59e6c6SAmlan Barua #endif 9409496c9aeSAmlan Barua break; 9413c3a9c75SAmlan Barua } 9420cf2e031SBarry Smith #endif 943e81bb599SAmlan Barua } 9443564f093SHong Zhang PetscFunctionReturn(0); 9453c3a9c75SAmlan Barua } 9463c3a9c75SAmlan Barua 9473564f093SHong Zhang /*@ 94811a5261eSBarry Smith VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector. 9493564f093SHong Zhang 950*c3339decSBarry Smith Collective 9513564f093SHong Zhang 9523564f093SHong Zhang Input Parameters: 95311a5261eSBarry Smith + A - `MATFFTW` matrix 9543564f093SHong Zhang - x - FFTW vector 9553564f093SHong Zhang 9563564f093SHong Zhang Output Parameters: 9573564f093SHong Zhang . y - PETSc vector 9583564f093SHong Zhang 9593564f093SHong Zhang Level: intermediate 9603564f093SHong Zhang 96111a5261eSBarry Smith Note: 96211a5261eSBarry Smith While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 96311a5261eSBarry Smith `VecScatterFFTWToPetsc()` removes those extra zeros. 9643564f093SHong Zhang 96511a5261eSBarry Smith .seealso: `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()` 9663564f093SHong Zhang @*/ 967d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y) 968d71ae5a4SJacob Faibussowitsch { 9693c3a9c75SAmlan Barua PetscFunctionBegin; 970cac4c232SBarry Smith PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y)); 9713c3a9c75SAmlan Barua PetscFunctionReturn(0); 9723c3a9c75SAmlan Barua } 97354efbe56SHong Zhang 974d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y) 975d71ae5a4SJacob Faibussowitsch { 976ce94432eSBarry Smith MPI_Comm comm; 9775b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT *)A->data; 9789496c9aeSAmlan Barua PetscInt low; 9797a21806cSHong Zhang PetscMPIInt rank, size; 9805b3e41ffSAmlan Barua VecScatter vecscat; 9810cf2e031SBarry Smith IS list1; 9829496c9aeSAmlan Barua 9833564f093SHong Zhang PetscFunctionBegin; 9849566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 9859566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 9869566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 9879566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(x, &low, NULL)); 9885b3e41ffSAmlan Barua 989e81bb599SAmlan Barua if (size == 1) { 9909566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1)); 9919566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 9929566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9939566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 9949566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 9959566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 996e81bb599SAmlan Barua 9970cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 9983564f093SHong Zhang } else { 9990cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 10000cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 10010cf2e031SBarry Smith ptrdiff_t local_n0, local_0_start; 10020cf2e031SBarry Smith ptrdiff_t local_n1, local_1_start; 10030cf2e031SBarry Smith IS list2; 10040cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 10050cf2e031SBarry Smith PetscInt i, j, k, partial_dim; 10060cf2e031SBarry Smith PetscInt *indx1, *indx2, tempindx, tempindx1; 10070cf2e031SBarry Smith PetscInt NM; 10080cf2e031SBarry Smith ptrdiff_t temp; 10090cf2e031SBarry Smith #endif 1010e81bb599SAmlan Barua switch (ndim) { 1011e81bb599SAmlan Barua case 1: 101264657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10137a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 101426fbe8dcSKarl Rupp 10159566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1)); 10169566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2)); 10179566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 10189566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10199566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10209566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10219566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10229566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 102364657d84SAmlan Barua #else 10246a506ed8SAmlan Barua SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT"); 102564657d84SAmlan Barua #endif 10265b3e41ffSAmlan Barua break; 10275b3e41ffSAmlan Barua case 2: 1028bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10297a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 103026fbe8dcSKarl Rupp 10319566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 10329566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 10339566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 10349566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10359566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10369566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10379566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10389566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 1039bd59e6c6SAmlan Barua #else 10407a21806cSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 104126fbe8dcSKarl Rupp 10429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1)); 10439566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2)); 10445b3e41ffSAmlan Barua 1045db4deed7SKarl Rupp if (dim[1] % 2 == 0) NM = dim[1] + 2; 1046db4deed7SKarl Rupp else NM = dim[1] + 1; 10475b3e41ffSAmlan Barua 10485b3e41ffSAmlan Barua for (i = 0; i < local_n0; i++) { 10495b3e41ffSAmlan Barua for (j = 0; j < dim[1]; j++) { 10505b3e41ffSAmlan Barua tempindx = i * dim[1] + j; 10515b3e41ffSAmlan Barua tempindx1 = i * NM + j; 105226fbe8dcSKarl Rupp 10535b3e41ffSAmlan Barua indx1[tempindx] = local_0_start * dim[1] + tempindx; 10545b3e41ffSAmlan Barua indx2[tempindx] = low + tempindx1; 10555b3e41ffSAmlan Barua } 10565b3e41ffSAmlan Barua } 10575b3e41ffSAmlan Barua 10589566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 10599566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 10605b3e41ffSAmlan Barua 10619566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 10629566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10639566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10649566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10659566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10669566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 10679566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 10689566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 1069bd59e6c6SAmlan Barua #endif 10709496c9aeSAmlan Barua break; 10715b3e41ffSAmlan Barua case 3: 1072bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10737a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 107426fbe8dcSKarl Rupp 10759566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 10769566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 10779566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 10789566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10799566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 10809566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10819566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10829566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 1083bd59e6c6SAmlan Barua #else 10847a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 108526fbe8dcSKarl Rupp 10869566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1)); 10879566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2)); 108851d1eed7SAmlan Barua 1089db4deed7SKarl Rupp if (dim[2] % 2 == 0) NM = dim[2] + 2; 1090db4deed7SKarl Rupp else NM = dim[2] + 1; 109151d1eed7SAmlan Barua 109251d1eed7SAmlan Barua for (i = 0; i < local_n0; i++) { 109351d1eed7SAmlan Barua for (j = 0; j < dim[1]; j++) { 109451d1eed7SAmlan Barua for (k = 0; k < dim[2]; k++) { 109551d1eed7SAmlan Barua tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 109651d1eed7SAmlan Barua tempindx1 = i * dim[1] * NM + j * NM + k; 109726fbe8dcSKarl Rupp 109851d1eed7SAmlan Barua indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 109951d1eed7SAmlan Barua indx2[tempindx] = low + tempindx1; 110051d1eed7SAmlan Barua } 110151d1eed7SAmlan Barua } 110251d1eed7SAmlan Barua } 110351d1eed7SAmlan Barua 11049566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 11059566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 110651d1eed7SAmlan Barua 11079566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 11089566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11099566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11109566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 11119566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 11129566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 11139566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 11149566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 1115bd59e6c6SAmlan Barua #endif 11169496c9aeSAmlan Barua break; 11175b3e41ffSAmlan Barua default: 1118bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 11197a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 112026fbe8dcSKarl Rupp 11219566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 11229566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 11239566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 11249566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11259566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11269566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 11279566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 11289566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 1129bd59e6c6SAmlan Barua #else 1130ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 113126fbe8dcSKarl Rupp 1132ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 113326fbe8dcSKarl Rupp 1134c3eba89aSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 113526fbe8dcSKarl Rupp 1136ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 1137ba6e06d0SAmlan Barua 1138ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1139ba6e06d0SAmlan Barua 11409566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1)); 11419566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2)); 1142ba6e06d0SAmlan Barua 1143db4deed7SKarl Rupp if (dim[ndim - 1] % 2 == 0) NM = 2; 1144db4deed7SKarl Rupp else NM = 1; 1145ba6e06d0SAmlan Barua 1146ba6e06d0SAmlan Barua j = low; 11473564f093SHong Zhang for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) { 1148ba6e06d0SAmlan Barua indx1[i] = local_0_start * partial_dim + i; 1149ba6e06d0SAmlan Barua indx2[i] = j; 11503564f093SHong Zhang if (k % dim[ndim - 1] == 0) j += NM; 1151ba6e06d0SAmlan Barua j++; 1152ba6e06d0SAmlan Barua } 11539566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 11549566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 1155ba6e06d0SAmlan Barua 11569566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 11579566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11589566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 11599566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 11609566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 11619566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 11629566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 11639566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 1164bd59e6c6SAmlan Barua #endif 11659496c9aeSAmlan Barua break; 11665b3e41ffSAmlan Barua } 11670cf2e031SBarry Smith #endif 1168e81bb599SAmlan Barua } 11693564f093SHong Zhang PetscFunctionReturn(0); 11705b3e41ffSAmlan Barua } 11715b3e41ffSAmlan Barua 11723c3a9c75SAmlan Barua /* 11733564f093SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 11743564f093SHong Zhang 11753c3a9c75SAmlan Barua Options Database Keys: 11763c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 11773c3a9c75SAmlan Barua 11783c3a9c75SAmlan Barua Level: intermediate 11793c3a9c75SAmlan Barua 11803c3a9c75SAmlan Barua */ 1181d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1182d71ae5a4SJacob Faibussowitsch { 1183ce94432eSBarry Smith MPI_Comm comm; 1184b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data; 1185b2b77a04SHong Zhang Mat_FFTW *fftw; 11860cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim; 11875274ed1bSHong Zhang const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"}; 11885274ed1bSHong Zhang unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE}; 1189b2b77a04SHong Zhang PetscBool flg; 11904f48bc67SAmlan Barua PetscInt p_flag, partial_dim = 1, ctr; 119111902ff2SHong Zhang PetscMPIInt size, rank; 11929496c9aeSAmlan Barua ptrdiff_t *pdim; 11939496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11940cf2e031SBarry Smith PetscInt tot_dim; 11959496c9aeSAmlan Barua #endif 11969496c9aeSAmlan Barua 1197b2b77a04SHong Zhang PetscFunctionBegin; 11989566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 11999566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 12009566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1201c92418ccSAmlan Barua 12020cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 12031878d478SAmlan Barua fftw_mpi_init(); 12040cf2e031SBarry Smith #endif 120511902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t)); 120611902ff2SHong Zhang pdim[0] = dim[0]; 12078ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12088ca4c034SAmlan Barua tot_dim = 2 * dim[0]; 12098ca4c034SAmlan Barua #endif 12103564f093SHong Zhang for (ctr = 1; ctr < ndim; ctr++) { 12116882372aSHong Zhang partial_dim *= dim[ctr]; 121211902ff2SHong Zhang pdim[ctr] = dim[ctr]; 12138ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1214db4deed7SKarl Rupp if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1); 1215db4deed7SKarl Rupp else tot_dim *= dim[ctr]; 12168ca4c034SAmlan Barua #endif 12176882372aSHong Zhang } 12183c3a9c75SAmlan Barua 1219b2b77a04SHong Zhang if (size == 1) { 1220e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12219566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N)); 12220cf2e031SBarry Smith fft->n = fft->N; 1223e81bb599SAmlan Barua #else 12249566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim)); 12250cf2e031SBarry Smith fft->n = tot_dim; 12260cf2e031SBarry Smith #endif 12270cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 12280cf2e031SBarry Smith } else { 12290cf2e031SBarry Smith ptrdiff_t local_n0, local_0_start, local_n1, local_1_start; 12300cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 12310cf2e031SBarry Smith ptrdiff_t temp; 12320cf2e031SBarry Smith PetscInt N1; 1233e81bb599SAmlan Barua #endif 1234e81bb599SAmlan Barua 1235b2b77a04SHong Zhang switch (ndim) { 1236b2b77a04SHong Zhang case 1: 12373c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12383c3a9c75SAmlan Barua SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 1239e5d7f247SAmlan Barua #else 12407a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 12410cf2e031SBarry Smith fft->n = (PetscInt)local_n0; 12429566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N)); 1243e5d7f247SAmlan Barua #endif 1244b2b77a04SHong Zhang break; 1245b2b77a04SHong Zhang case 2: 12465b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 12477a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 12480cf2e031SBarry Smith fft->n = (PetscInt)local_n0 * dim[1]; 12499566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 12505b3e41ffSAmlan Barua #else 1251c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 125226fbe8dcSKarl Rupp 12530cf2e031SBarry Smith fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1); 12549566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1))); 12555b3e41ffSAmlan Barua #endif 1256b2b77a04SHong Zhang break; 1257b2b77a04SHong Zhang case 3: 125851d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12597a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 126026fbe8dcSKarl Rupp 12610cf2e031SBarry Smith fft->n = (PetscInt)local_n0 * dim[1] * dim[2]; 12629566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 126351d1eed7SAmlan Barua #else 1264c3eba89aSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 126526fbe8dcSKarl Rupp 12660cf2e031SBarry Smith fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1); 12679566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1))); 126851d1eed7SAmlan Barua #endif 1269b2b77a04SHong Zhang break; 1270b2b77a04SHong Zhang default: 1271b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12727a21806cSHong Zhang fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start); 127326fbe8dcSKarl Rupp 12740cf2e031SBarry Smith fft->n = (PetscInt)local_n0 * partial_dim; 12759566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1276b3a17365SAmlan Barua #else 1277b3a17365SAmlan Barua temp = pdim[ndim - 1]; 127826fbe8dcSKarl Rupp 1279b3a17365SAmlan Barua pdim[ndim - 1] = temp / 2 + 1; 128026fbe8dcSKarl Rupp 1281c3eba89aSHong Zhang fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 128226fbe8dcSKarl Rupp 12830cf2e031SBarry Smith fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp; 12840cf2e031SBarry Smith N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp); 128526fbe8dcSKarl Rupp 1286b3a17365SAmlan Barua pdim[ndim - 1] = temp; 128726fbe8dcSKarl Rupp 12889566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1)); 1289b3a17365SAmlan Barua #endif 1290b2b77a04SHong Zhang break; 1291b2b77a04SHong Zhang } 12920cf2e031SBarry Smith #endif 1293b2b77a04SHong Zhang } 12942277172eSMark Adams free(pdim); 12959566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW)); 12964dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&fftw)); 1297b2b77a04SHong Zhang fft->data = (void *)fftw; 1298b2b77a04SHong Zhang 12990c74a584SJed Brown fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1300e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 130126fbe8dcSKarl Rupp 13029566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw))); 13038c1d5ab3SHong Zhang if (size == 1) { 1304a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 1305a1b6d50cSHong Zhang fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim); 1306a1b6d50cSHong Zhang #else 13078c1d5ab3SHong Zhang fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim); 1308a1b6d50cSHong Zhang #endif 13098c1d5ab3SHong Zhang } 131026fbe8dcSKarl Rupp 1311b1301b2eSHong Zhang for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr]; 1312c92418ccSAmlan Barua 1313f4259b30SLisandro Dalcin fftw->p_forward = NULL; 1314f4259b30SLisandro Dalcin fftw->p_backward = NULL; 1315b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1316b2b77a04SHong Zhang 1317b2b77a04SHong Zhang if (size == 1) { 1318b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1319b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 13200cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 1321b2b77a04SHong Zhang } else { 1322b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1323b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 13240cf2e031SBarry Smith #endif 1325b2b77a04SHong Zhang } 1326b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1327b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 13284a52bad8SHong Zhang A->preallocated = PETSC_TRUE; 132926fbe8dcSKarl Rupp 13309566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW)); 13319566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW)); 13329566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW)); 1333b2b77a04SHong Zhang 1334b2b77a04SHong Zhang /* get runtime options */ 1335d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat"); 13369566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg)); 13375f80ce2aSJacob Faibussowitsch if (flg) fftw->p_flag = iplans[p_flag]; 1338d0609cedSBarry Smith PetscOptionsEnd(); 1339b2b77a04SHong Zhang PetscFunctionReturn(0); 1340b2b77a04SHong Zhang } 1341