1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4c4762a1bSJed Brown Testing examples can be found in ~src/mat/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 90cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 10c6db04a5SJed Brown #include <fftw3-mpi.h> 110cf2e031SBarry Smith #else 120cf2e031SBarry Smith #include <fftw3.h> 130cf2e031SBarry Smith #endif 14b2b77a04SHong Zhang EXTERN_C_END 15b2b77a04SHong Zhang 16b2b77a04SHong Zhang typedef struct { 17b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 18a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 19a1b6d50cSHong Zhang fftw_iodim64 *iodims; 20a1b6d50cSHong Zhang #else 218c1d5ab3SHong Zhang fftw_iodim *iodims; 22a1b6d50cSHong Zhang #endif 23e5d7f247SAmlan Barua PetscInt partial_dim; 24b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 25b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 26b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 27b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 28b2b77a04SHong Zhang } Mat_FFTW; 29b2b77a04SHong Zhang 30b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 31b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 320cf2e031SBarry Smith extern PetscErrorCode MatDestroy_FFTW(Mat); 330cf2e031SBarry Smith extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat,Vec*,Vec*,Vec*); 340cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 35b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 36b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 37b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 380cf2e031SBarry Smith #endif 39b2b77a04SHong Zhang 400cf2e031SBarry Smith /* 410cf2e031SBarry Smith MatMult_SeqFFTW performs forward DFT 4297504071SAmlan Barua Input parameter: 433564f093SHong Zhang A - the matrix 4497504071SAmlan Barua x - the vector on which FDFT will be performed 4597504071SAmlan Barua 4697504071SAmlan Barua Output parameter: 4797504071SAmlan Barua y - vector that stores result of FDFT 4897504071SAmlan Barua */ 49b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 50b2b77a04SHong Zhang { 51b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 52b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 53f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 54f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 551acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 56a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 57a1b6d50cSHong Zhang fftw_iodim64 *iodims; 58a1b6d50cSHong Zhang #else 598c1d5ab3SHong Zhang fftw_iodim *iodims; 60a1b6d50cSHong Zhang #endif 611acd80e4SHong Zhang PetscInt i; 621acd80e4SHong Zhang #endif 631acd80e4SHong Zhang PetscInt ndim = fft->ndim,*dim = fft->dim; 64b2b77a04SHong Zhang 65b2b77a04SHong Zhang PetscFunctionBegin; 669566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x,&x_array)); 679566063dSJacob Faibussowitsch PetscCall(VecGetArray(y,&y_array)); 68b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 69b2b77a04SHong Zhang switch (ndim) { 70b2b77a04SHong Zhang case 1: 7158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 72b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 7358a912c5SAmlan Barua #else 7458a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 7558a912c5SAmlan Barua #endif 76b2b77a04SHong Zhang break; 77b2b77a04SHong Zhang case 2: 7858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 79b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 8058a912c5SAmlan Barua #else 8158a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 8258a912c5SAmlan Barua #endif 83b2b77a04SHong Zhang break; 84b2b77a04SHong Zhang case 3: 8558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 86b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 8758a912c5SAmlan Barua #else 8858a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 8958a912c5SAmlan Barua #endif 90b2b77a04SHong Zhang break; 91b2b77a04SHong Zhang default: 9258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 93a1b6d50cSHong Zhang iodims = fftw->iodims; 94a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 958c1d5ab3SHong Zhang if (ndim) { 96a1b6d50cSHong Zhang iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1]; 978c1d5ab3SHong Zhang iodims[ndim-1].is = iodims[ndim-1].os = 1; 988c1d5ab3SHong Zhang for (i=ndim-2; i>=0; --i) { 99a1b6d50cSHong Zhang iodims[i].n = (ptrdiff_t)dim[i]; 1008c1d5ab3SHong Zhang iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 1018c1d5ab3SHong Zhang } 1028c1d5ab3SHong Zhang } 103a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 104a1b6d50cSHong Zhang #else 105a1b6d50cSHong Zhang if (ndim) { 106a1b6d50cSHong Zhang iodims[ndim-1].n = (int)dim[ndim-1]; 107a1b6d50cSHong Zhang iodims[ndim-1].is = iodims[ndim-1].os = 1; 108a1b6d50cSHong Zhang for (i=ndim-2; i>=0; --i) { 109a1b6d50cSHong Zhang iodims[i].n = (int)dim[i]; 110a1b6d50cSHong Zhang iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 111a1b6d50cSHong Zhang } 112a1b6d50cSHong Zhang } 113a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 114a1b6d50cSHong Zhang #endif 115a1b6d50cSHong Zhang 11658a912c5SAmlan Barua #else 117a31b9140SHong Zhang fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 11858a912c5SAmlan Barua #endif 119b2b77a04SHong Zhang break; 120b2b77a04SHong Zhang } 121f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *) x_array; 122b2b77a04SHong Zhang fftw->foutarray = y_array; 123b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 124b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 125b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 126b2b77a04SHong Zhang } else { /* use existing plan */ 127b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 1287e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 129b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 1307e4bc134SDominic Meiser #else 1317e4bc134SDominic Meiser fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array); 1327e4bc134SDominic Meiser #endif 133b2b77a04SHong Zhang } else { 134b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 135b2b77a04SHong Zhang } 136b2b77a04SHong Zhang } 1379566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y,&y_array)); 1389566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x,&x_array)); 139b2b77a04SHong Zhang PetscFunctionReturn(0); 140b2b77a04SHong Zhang } 141b2b77a04SHong Zhang 14297504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT 14397504071SAmlan Barua Input parameter: 1443564f093SHong Zhang A - the matrix 14597504071SAmlan Barua x - the vector on which BDFT will be performed 14697504071SAmlan Barua 14797504071SAmlan Barua Output parameter: 14897504071SAmlan Barua y - vector that stores result of BDFT 14997504071SAmlan Barua */ 15097504071SAmlan Barua 151b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 152b2b77a04SHong Zhang { 153b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 154b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 155f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 156f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 157b2b77a04SHong Zhang PetscInt ndim = fft->ndim,*dim = fft->dim; 1581acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 159a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 160a1b6d50cSHong Zhang fftw_iodim64 *iodims = fftw->iodims; 161a1b6d50cSHong Zhang #else 162a1b6d50cSHong Zhang fftw_iodim *iodims = fftw->iodims; 163a1b6d50cSHong Zhang #endif 1641acd80e4SHong Zhang #endif 165b2b77a04SHong Zhang 166b2b77a04SHong Zhang PetscFunctionBegin; 1679566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x,&x_array)); 1689566063dSJacob Faibussowitsch PetscCall(VecGetArray(y,&y_array)); 169b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 170b2b77a04SHong Zhang switch (ndim) { 171b2b77a04SHong Zhang case 1: 17258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 173b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 17458a912c5SAmlan Barua #else 175e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 17658a912c5SAmlan Barua #endif 177b2b77a04SHong Zhang break; 178b2b77a04SHong Zhang case 2: 17958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 180b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 18158a912c5SAmlan Barua #else 182e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 18358a912c5SAmlan Barua #endif 184b2b77a04SHong Zhang break; 185b2b77a04SHong Zhang case 3: 18658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 187b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 18858a912c5SAmlan Barua #else 189e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 19058a912c5SAmlan Barua #endif 191b2b77a04SHong Zhang break; 192b2b77a04SHong Zhang default: 19358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 194a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 195a1b6d50cSHong Zhang fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 196a1b6d50cSHong Zhang #else 1978c1d5ab3SHong Zhang fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 198a1b6d50cSHong Zhang #endif 19958a912c5SAmlan Barua #else 200a31b9140SHong Zhang fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 20158a912c5SAmlan Barua #endif 202b2b77a04SHong Zhang break; 203b2b77a04SHong Zhang } 204f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *) x_array; 205b2b77a04SHong Zhang fftw->boutarray = y_array; 2062f613bf5SBarry Smith fftw_execute(fftw->p_backward); 207b2b77a04SHong Zhang } else { /* use existing plan */ 208b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 2097e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 210b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 2117e4bc134SDominic Meiser #else 2127e4bc134SDominic Meiser fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array); 2137e4bc134SDominic Meiser #endif 214b2b77a04SHong Zhang } else { 2152f613bf5SBarry Smith fftw_execute(fftw->p_backward); 216b2b77a04SHong Zhang } 217b2b77a04SHong Zhang } 2189566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y,&y_array)); 2199566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x,&x_array)); 220b2b77a04SHong Zhang PetscFunctionReturn(0); 221b2b77a04SHong Zhang } 222b2b77a04SHong Zhang 2230cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 22497504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel 22597504071SAmlan Barua Input parameter: 2263564f093SHong Zhang A - the matrix 22797504071SAmlan Barua x - the vector on which FDFT will be performed 22897504071SAmlan Barua 22997504071SAmlan Barua Output parameter: 23097504071SAmlan Barua y - vector that stores result of FDFT 23197504071SAmlan Barua */ 232b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 233b2b77a04SHong Zhang { 234b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 235b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 236f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 237f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 238c92418ccSAmlan Barua PetscInt ndim = fft->ndim,*dim = fft->dim; 239ce94432eSBarry Smith MPI_Comm comm; 240b2b77a04SHong Zhang 241b2b77a04SHong Zhang PetscFunctionBegin; 2429566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2439566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x,&x_array)); 2449566063dSJacob Faibussowitsch PetscCall(VecGetArray(y,&y_array)); 245b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 246b2b77a04SHong Zhang switch (ndim) { 247b2b77a04SHong Zhang case 1: 2483c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 249b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 250ae0a50aaSHong Zhang #else 2514f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 2523c3a9c75SAmlan Barua #endif 253b2b77a04SHong Zhang break; 254b2b77a04SHong Zhang case 2: 25597504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 256b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2573c3a9c75SAmlan Barua #else 2583c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2593c3a9c75SAmlan Barua #endif 260b2b77a04SHong Zhang break; 261b2b77a04SHong Zhang case 3: 2623c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 263b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2643c3a9c75SAmlan Barua #else 26551d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2663c3a9c75SAmlan Barua #endif 267b2b77a04SHong Zhang break; 268b2b77a04SHong Zhang default: 2693c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 270c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2713c3a9c75SAmlan Barua #else 2723c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2733c3a9c75SAmlan Barua #endif 274b2b77a04SHong Zhang break; 275b2b77a04SHong Zhang } 276f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *) x_array; 277b2b77a04SHong Zhang fftw->foutarray = y_array; 278b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 279b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 280b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 281b2b77a04SHong Zhang } else { /* use existing plan */ 282b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 283b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 284b2b77a04SHong Zhang } else { 285b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 286b2b77a04SHong Zhang } 287b2b77a04SHong Zhang } 2889566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y,&y_array)); 2899566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x,&x_array)); 290b2b77a04SHong Zhang PetscFunctionReturn(0); 291b2b77a04SHong Zhang } 292b2b77a04SHong Zhang 2930cf2e031SBarry Smith /* 2940cf2e031SBarry Smith MatMultTranspose_MPIFFTW performs parallel backward DFT 29597504071SAmlan Barua Input parameter: 2963564f093SHong Zhang A - the matrix 29797504071SAmlan Barua x - the vector on which BDFT will be performed 29897504071SAmlan Barua 29997504071SAmlan Barua Output parameter: 30097504071SAmlan Barua y - vector that stores result of BDFT 30197504071SAmlan Barua */ 302b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 303b2b77a04SHong Zhang { 304b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 305b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 306f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 307f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 308c92418ccSAmlan Barua PetscInt ndim = fft->ndim,*dim = fft->dim; 309ce94432eSBarry Smith MPI_Comm comm; 310b2b77a04SHong Zhang 311b2b77a04SHong Zhang PetscFunctionBegin; 3129566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 3139566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x,&x_array)); 3149566063dSJacob Faibussowitsch PetscCall(VecGetArray(y,&y_array)); 315b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 316b2b77a04SHong Zhang switch (ndim) { 317b2b77a04SHong Zhang case 1: 3183c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 319b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 320ae0a50aaSHong Zhang #else 3214f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 3223c3a9c75SAmlan Barua #endif 323b2b77a04SHong Zhang break; 324b2b77a04SHong Zhang case 2: 32597504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 326b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3273c3a9c75SAmlan Barua #else 3283c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3293c3a9c75SAmlan Barua #endif 330b2b77a04SHong Zhang break; 331b2b77a04SHong Zhang case 3: 3323c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 333b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3343c3a9c75SAmlan Barua #else 3353c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3363c3a9c75SAmlan Barua #endif 337b2b77a04SHong Zhang break; 338b2b77a04SHong Zhang default: 3393c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 340c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3413c3a9c75SAmlan Barua #else 3423c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3433c3a9c75SAmlan Barua #endif 344b2b77a04SHong Zhang break; 345b2b77a04SHong Zhang } 346f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *) x_array; 347b2b77a04SHong Zhang fftw->boutarray = y_array; 3482f613bf5SBarry Smith fftw_execute(fftw->p_backward); 349b2b77a04SHong Zhang } else { /* use existing plan */ 350b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 351b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 352b2b77a04SHong Zhang } else { 3532f613bf5SBarry Smith fftw_execute(fftw->p_backward); 354b2b77a04SHong Zhang } 355b2b77a04SHong Zhang } 3569566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y,&y_array)); 3579566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x,&x_array)); 358b2b77a04SHong Zhang PetscFunctionReturn(0); 359b2b77a04SHong Zhang } 3600cf2e031SBarry Smith #endif 361b2b77a04SHong Zhang 362b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 363b2b77a04SHong Zhang { 364b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 365b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 366b2b77a04SHong Zhang 367b2b77a04SHong Zhang PetscFunctionBegin; 368b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 369b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 3708c1d5ab3SHong Zhang if (fftw->iodims) { 3718c1d5ab3SHong Zhang free(fftw->iodims); 3728c1d5ab3SHong Zhang } 3739566063dSJacob Faibussowitsch PetscCall(PetscFree(fftw->dim_fftw)); 3749566063dSJacob Faibussowitsch PetscCall(PetscFree(fft->data)); 3750cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 3766ccf0b3eSHong Zhang fftw_mpi_cleanup(); 3770cf2e031SBarry Smith #endif 378b2b77a04SHong Zhang PetscFunctionReturn(0); 379b2b77a04SHong Zhang } 380b2b77a04SHong Zhang 3810cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 382c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 383b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 384b2b77a04SHong Zhang { 385b2b77a04SHong Zhang PetscScalar *array; 386b2b77a04SHong Zhang 387b2b77a04SHong Zhang PetscFunctionBegin; 3889566063dSJacob Faibussowitsch PetscCall(VecGetArray(v,&array)); 3892f613bf5SBarry Smith fftw_free((fftw_complex*)array); 3909566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v,&array)); 3919566063dSJacob Faibussowitsch PetscCall(VecDestroy_MPI(v)); 392b2b77a04SHong Zhang PetscFunctionReturn(0); 393b2b77a04SHong Zhang } 3940cf2e031SBarry Smith #endif 395b2b77a04SHong Zhang 3960cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 3975b113e21Ss-sajid-ali static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin,Vec *fin_new) 3985b113e21Ss-sajid-ali { 3995b113e21Ss-sajid-ali Mat A; 4005b113e21Ss-sajid-ali 4015b113e21Ss-sajid-ali PetscFunctionBegin; 4029566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)fin,"FFTmatrix",(PetscObject*)&A)); 4039566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A,fin_new,NULL,NULL)); 4045b113e21Ss-sajid-ali PetscFunctionReturn(0); 4055b113e21Ss-sajid-ali } 4065b113e21Ss-sajid-ali 4075b113e21Ss-sajid-ali static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout,Vec *fout_new) 4085b113e21Ss-sajid-ali { 4095b113e21Ss-sajid-ali Mat A; 4105b113e21Ss-sajid-ali 4115b113e21Ss-sajid-ali PetscFunctionBegin; 4129566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)fout,"FFTmatrix",(PetscObject*)&A)); 4139566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A,NULL,fout_new,NULL)); 4145b113e21Ss-sajid-ali PetscFunctionReturn(0); 4155b113e21Ss-sajid-ali } 4165b113e21Ss-sajid-ali 4175b113e21Ss-sajid-ali static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) 4185b113e21Ss-sajid-ali { 4195b113e21Ss-sajid-ali Mat A; 4205b113e21Ss-sajid-ali 4215b113e21Ss-sajid-ali PetscFunctionBegin; 4229566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)bout,"FFTmatrix",(PetscObject*)&A)); 4239566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A,NULL,NULL,bout_new)); 4245b113e21Ss-sajid-ali PetscFunctionReturn(0); 4255b113e21Ss-sajid-ali } 4260cf2e031SBarry Smith #endif 4275b113e21Ss-sajid-ali 4284be45526SHong Zhang /*@ 4292a7a6963SBarry Smith MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 4304f7415efSAmlan Barua parallel layout determined by FFTW 4314f7415efSAmlan Barua 4324f7415efSAmlan Barua Collective on Mat 4334f7415efSAmlan Barua 4344f7415efSAmlan Barua Input Parameter: 4353564f093SHong Zhang . A - the matrix 4364f7415efSAmlan Barua 437d8d19677SJose E. Roman Output Parameters: 438cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW 4396b867d5aSJose E. Roman . y - (optional) output vector of forward FFTW 440cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW 4414f7415efSAmlan Barua 4424f7415efSAmlan Barua Level: advanced 4433564f093SHong Zhang 44497504071SAmlan Barua Note: The parallel layout of output of forward FFTW is always same as the input 44597504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 44697504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 44797504071SAmlan Barua Also note that we need to provide enough space while doing parallel real transform. 44897504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 44997504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 450e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 451e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two 452e0ec536eSAmlan Barua zeros if it is odd only one zero is needed. 453e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local 454e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for 455e0ec536eSAmlan Barua each processor and returns that. 4564f7415efSAmlan Barua 45775f45d78SBarry Smith .seealso: MatCreateFFT() 4584be45526SHong Zhang @*/ 4592a7a6963SBarry Smith PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 4604be45526SHong Zhang { 4614be45526SHong Zhang PetscFunctionBegin; 462*cac4c232SBarry Smith PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z)); 4634be45526SHong Zhang PetscFunctionReturn(0); 4644be45526SHong Zhang } 4654be45526SHong Zhang 4662a7a6963SBarry Smith PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 4674f7415efSAmlan Barua { 4684f7415efSAmlan Barua PetscMPIInt size,rank; 469ce94432eSBarry Smith MPI_Comm comm; 4704f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 4714f7415efSAmlan Barua 4724f7415efSAmlan Barua PetscFunctionBegin; 4734f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 4744f7415efSAmlan Barua PetscValidType(A,1); 4759566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 4764f7415efSAmlan Barua 4779566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 4789566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4794f7415efSAmlan Barua if (size == 1) { /* sequential case */ 4804f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4819566063dSJacob Faibussowitsch if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->N,fin)); 4829566063dSJacob Faibussowitsch if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->N,fout)); 4839566063dSJacob Faibussowitsch if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->N,bout)); 4844f7415efSAmlan Barua #else 4859566063dSJacob Faibussowitsch if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->n,fin)); 4869566063dSJacob Faibussowitsch if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->n,fout)); 4879566063dSJacob Faibussowitsch if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->n,bout)); 4884f7415efSAmlan Barua #endif 4890cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 49097504071SAmlan Barua } else { /* parallel cases */ 4910cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 4920cf2e031SBarry Smith PetscInt ndim = fft->ndim,*dim = fft->dim; 4934f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 4949496c9aeSAmlan Barua ptrdiff_t local_n1; 4959496c9aeSAmlan Barua fftw_complex *data_fout; 4969496c9aeSAmlan Barua ptrdiff_t local_1_start; 4979496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4989496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 4999496c9aeSAmlan Barua #else 5004f7415efSAmlan Barua double *data_finr,*data_boutr; 5016ccf0b3eSHong Zhang PetscInt n1,N1; 5029496c9aeSAmlan Barua ptrdiff_t temp; 5039496c9aeSAmlan Barua #endif 5049496c9aeSAmlan Barua 5054f7415efSAmlan Barua switch (ndim) { 5064f7415efSAmlan Barua case 1: 50757625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5084f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 50957625b84SAmlan Barua #else 51057625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 51157625b84SAmlan Barua if (fin) { 51257625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5139566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm,1,local_n0,fft->N,(const PetscScalar*)data_fin,fin)); 5149566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 5155b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 51657625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 51757625b84SAmlan Barua } 51857625b84SAmlan Barua if (fout) { 51957625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5209566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_fout,fout)); 5219566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 5225b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 52357625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 52457625b84SAmlan Barua } 52557625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 52657625b84SAmlan Barua if (bout) { 52757625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5289566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_bout,bout)); 5299566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 5305b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 53157625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 53257625b84SAmlan Barua } 53357625b84SAmlan Barua break; 53457625b84SAmlan Barua #endif 5354f7415efSAmlan Barua case 2: 53697504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 5374f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5384f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 5394f7415efSAmlan Barua if (fin) { 5404f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 5419566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin)); 5429566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 5435b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5444f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5454f7415efSAmlan Barua } 54657625b84SAmlan Barua if (fout) { 54757625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5489566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout)); 5499566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 5505b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 55157625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 55257625b84SAmlan Barua } 5535b113e21Ss-sajid-ali if (bout) { 5545b113e21Ss-sajid-ali data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 5559566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout)); 5569566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 5575b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 5585b113e21Ss-sajid-ali (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5595b113e21Ss-sajid-ali } 5604f7415efSAmlan Barua #else 5614f7415efSAmlan Barua /* Get local size */ 5624f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 5634f7415efSAmlan Barua if (fin) { 5644f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5659566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin)); 5669566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 5675b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5684f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5694f7415efSAmlan Barua } 5704f7415efSAmlan Barua if (fout) { 5714f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5729566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout)); 5739566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 5745b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 5754f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5764f7415efSAmlan Barua } 5774f7415efSAmlan Barua if (bout) { 5784f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5799566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout)); 5809566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 5815b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 5824f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5834f7415efSAmlan Barua } 5844f7415efSAmlan Barua #endif 5854f7415efSAmlan Barua break; 5864f7415efSAmlan Barua case 3: 5874f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5884f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5894f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 5904f7415efSAmlan Barua if (fin) { 5914f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 5929566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin)); 5939566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 5945b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5954f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5964f7415efSAmlan Barua } 5975b113e21Ss-sajid-ali if (fout) { 5985b113e21Ss-sajid-ali data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5999566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout)); 6009566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 6015b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6025b113e21Ss-sajid-ali (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6035b113e21Ss-sajid-ali } 6044f7415efSAmlan Barua if (bout) { 6054f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 6069566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout)); 6079566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 6085b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6094f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6104f7415efSAmlan Barua } 6114f7415efSAmlan Barua #else 6120c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 6130c9b18e4SAmlan Barua if (fin) { 6140c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6159566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin)); 6169566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 6175b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6180c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6190c9b18e4SAmlan Barua } 6200c9b18e4SAmlan Barua if (fout) { 6210c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6229566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout)); 6239566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 6245b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6250c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6260c9b18e4SAmlan Barua } 6270c9b18e4SAmlan Barua if (bout) { 6280c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6299566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout)); 6309566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 6315b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6320c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6330c9b18e4SAmlan Barua } 6344f7415efSAmlan Barua #endif 6354f7415efSAmlan Barua break; 6364f7415efSAmlan Barua default: 6374f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6384f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 6394f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 6404f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 6410cf2e031SBarry Smith N1 = 2*fft->N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 6424f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 6434f7415efSAmlan Barua 6444f7415efSAmlan Barua if (fin) { 6454f7415efSAmlan Barua data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 6469566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_finr,fin)); 6479566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 6485b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6494f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6504f7415efSAmlan Barua } 6515b113e21Ss-sajid-ali if (fout) { 6525b113e21Ss-sajid-ali data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6539566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_fout,fout)); 6549566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 6555b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6565b113e21Ss-sajid-ali (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6575b113e21Ss-sajid-ali } 6584f7415efSAmlan Barua if (bout) { 6594f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 6609566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_boutr,bout)); 6619566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 6625b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6639496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6644f7415efSAmlan Barua } 6654f7415efSAmlan Barua #else 6660c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 6670c9b18e4SAmlan Barua if (fin) { 6680c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6699566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin)); 6709566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 6715b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6720c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6730c9b18e4SAmlan Barua } 6740c9b18e4SAmlan Barua if (fout) { 6750c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6769566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout)); 6779566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 6785b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6790c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6800c9b18e4SAmlan Barua } 6810c9b18e4SAmlan Barua if (bout) { 6820c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6839566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout)); 6849566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 6855b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6860c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6870c9b18e4SAmlan Barua } 6884f7415efSAmlan Barua #endif 6894f7415efSAmlan Barua break; 6904f7415efSAmlan Barua } 691f9d91177SJunchao Zhang /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 692f9d91177SJunchao Zhang v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 693f9d91177SJunchao Zhang memory leaks. We void these pointers here to avoid acident uses. 694f9d91177SJunchao Zhang */ 695f9d91177SJunchao Zhang if (fin) (*fin)->ops->replacearray = NULL; 696f9d91177SJunchao Zhang if (fout) (*fout)->ops->replacearray = NULL; 697f9d91177SJunchao Zhang if (bout) (*bout)->ops->replacearray = NULL; 6980cf2e031SBarry Smith #endif 6994f7415efSAmlan Barua } 7004f7415efSAmlan Barua PetscFunctionReturn(0); 7014f7415efSAmlan Barua } 7024f7415efSAmlan Barua 7033564f093SHong Zhang /*@ 7043564f093SHong Zhang VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 70554efbe56SHong Zhang 7063564f093SHong Zhang Collective on Mat 7073564f093SHong Zhang 7083564f093SHong Zhang Input Parameters: 7093564f093SHong Zhang + A - FFTW matrix 7103564f093SHong Zhang - x - the PETSc vector 7113564f093SHong Zhang 7123564f093SHong Zhang Output Parameters: 7133564f093SHong Zhang . y - the FFTW vector 7143564f093SHong Zhang 715b2b77a04SHong Zhang Options Database Keys: 7163564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags 717b2b77a04SHong Zhang 718b2b77a04SHong Zhang Level: intermediate 7193564f093SHong Zhang 72097504071SAmlan Barua Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 72197504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 72297504071SAmlan Barua zeros. This routine does that job by scattering operation. 72397504071SAmlan Barua 7243564f093SHong Zhang .seealso: VecScatterFFTWToPetsc() 7253564f093SHong Zhang @*/ 7263564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 7273564f093SHong Zhang { 7283564f093SHong Zhang PetscFunctionBegin; 729*cac4c232SBarry Smith PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y)); 7303564f093SHong Zhang PetscFunctionReturn(0); 7313564f093SHong Zhang } 7323c3a9c75SAmlan Barua 73374a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 7343c3a9c75SAmlan Barua { 735ce94432eSBarry Smith MPI_Comm comm; 7363c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 7379496c9aeSAmlan Barua PetscInt low; 7387a21806cSHong Zhang PetscMPIInt rank,size; 7397a21806cSHong Zhang PetscInt vsize,vsize1; 7403c3a9c75SAmlan Barua VecScatter vecscat; 7410cf2e031SBarry Smith IS list1; 7423c3a9c75SAmlan Barua 7433564f093SHong Zhang PetscFunctionBegin; 7449566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 7459566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 7469566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 7479566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(y,&low,NULL)); 7483c3a9c75SAmlan Barua 7493564f093SHong Zhang if (size==1) { 7509566063dSJacob Faibussowitsch PetscCall(VecGetSize(x,&vsize)); 7519566063dSJacob Faibussowitsch PetscCall(VecGetSize(y,&vsize1)); 7529566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF,fft->N,0,1,&list1)); 7539566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,list1,y,list1,&vecscat)); 7549566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 7559566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 7569566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 7579566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 7580cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 7593564f093SHong Zhang } else { 7600cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 7610cf2e031SBarry Smith PetscInt ndim = fft->ndim,*dim = fft->dim; 7620cf2e031SBarry Smith ptrdiff_t local_n0,local_0_start; 7630cf2e031SBarry Smith ptrdiff_t local_n1,local_1_start; 7640cf2e031SBarry Smith IS list2; 7650cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 7660cf2e031SBarry Smith PetscInt i,j,k,partial_dim; 7670cf2e031SBarry Smith PetscInt *indx1, *indx2, tempindx, tempindx1; 7680cf2e031SBarry Smith PetscInt NM; 7690cf2e031SBarry Smith ptrdiff_t temp; 7700cf2e031SBarry Smith #endif 7710cf2e031SBarry Smith 7723c3a9c75SAmlan Barua switch (ndim) { 7733c3a9c75SAmlan Barua case 1: 77464657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7757a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 77626fbe8dcSKarl Rupp 7779566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm,local_n0,local_0_start,1,&list1)); 7789566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm,local_n0,low,1,&list2)); 7799566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 7809566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 7819566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 7829566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 7839566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 7849566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 78564657d84SAmlan Barua #else 786e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 78764657d84SAmlan Barua #endif 7883c3a9c75SAmlan Barua break; 7893c3a9c75SAmlan Barua case 2: 790bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7917a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 79226fbe8dcSKarl Rupp 7939566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1)); 7949566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm,local_n0*dim[1],low,1,&list2)); 7959566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 7969566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 7979566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 7989566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 7999566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8009566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 801bd59e6c6SAmlan Barua #else 802c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 80326fbe8dcSKarl Rupp 8049566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1)); 8059566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2)); 8063c3a9c75SAmlan Barua 8073564f093SHong Zhang if (dim[1]%2==0) { 8083c3a9c75SAmlan Barua NM = dim[1]+2; 8093564f093SHong Zhang } else { 8103c3a9c75SAmlan Barua NM = dim[1]+1; 8113564f093SHong Zhang } 8123c3a9c75SAmlan Barua for (i=0; i<local_n0; i++) { 8133c3a9c75SAmlan Barua for (j=0; j<dim[1]; j++) { 8143c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 8153c3a9c75SAmlan Barua tempindx1 = i*NM + j; 81626fbe8dcSKarl Rupp 8175b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 8183c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 8193c3a9c75SAmlan Barua } 8203c3a9c75SAmlan Barua } 8213c3a9c75SAmlan Barua 8229566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1)); 8239566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2)); 8243c3a9c75SAmlan Barua 8259566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 8269566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 8279566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 8289566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8299566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8309566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 8319566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 8329566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 833bd59e6c6SAmlan Barua #endif 8349496c9aeSAmlan Barua break; 8353c3a9c75SAmlan Barua 8363c3a9c75SAmlan Barua case 3: 837bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8387a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 83926fbe8dcSKarl Rupp 8409566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1)); 8419566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2)); 8429566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 8439566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 8449566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 8459566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8469566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8479566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 848bd59e6c6SAmlan Barua #else 849c4762a1bSJed Brown /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 850f1ade23cSHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform"); 8517a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 85226fbe8dcSKarl Rupp 8539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1)); 8549566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2)); 85551d1eed7SAmlan Barua 856db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 857db4deed7SKarl Rupp else NM = dim[2]+1; 85851d1eed7SAmlan Barua 85951d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 86051d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 86151d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 86251d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 86351d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 86426fbe8dcSKarl Rupp 86551d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 86651d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 86751d1eed7SAmlan Barua } 86851d1eed7SAmlan Barua } 86951d1eed7SAmlan Barua } 87051d1eed7SAmlan Barua 8719566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1)); 8729566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2)); 8739566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 8749566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 8759566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 8769566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8779566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8789566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 8799566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 8809566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 881bd59e6c6SAmlan Barua #endif 8829496c9aeSAmlan Barua break; 8833c3a9c75SAmlan Barua 8843c3a9c75SAmlan Barua default: 885bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8867a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 88726fbe8dcSKarl Rupp 8889566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1)); 8899566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2)); 8909566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 8919566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 8929566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 8939566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 8949566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 8959566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 896bd59e6c6SAmlan Barua #else 897c4762a1bSJed Brown /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 898f1ade23cSHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform"); 899e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 90026fbe8dcSKarl Rupp 901e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 90226fbe8dcSKarl Rupp 9037a21806cSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 90426fbe8dcSKarl Rupp 905e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 906e5d7f247SAmlan Barua 907e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 908e5d7f247SAmlan Barua 9099566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1)); 9109566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2)); 911e5d7f247SAmlan Barua 912db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 913db4deed7SKarl Rupp else NM = 1; 914e5d7f247SAmlan Barua 9156971673cSAmlan Barua j = low; 9163564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 9176971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 9186971673cSAmlan Barua indx2[i] = j; 91926fbe8dcSKarl Rupp if (k%dim[ndim-1]==0) j+=NM; 9206971673cSAmlan Barua j++; 9216971673cSAmlan Barua } 9229566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1)); 9239566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2)); 9249566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 9259566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 9269566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 9279566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 9289566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 9299566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 9309566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 9319566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 932bd59e6c6SAmlan Barua #endif 9339496c9aeSAmlan Barua break; 9343c3a9c75SAmlan Barua } 9350cf2e031SBarry Smith #endif 936e81bb599SAmlan Barua } 9373564f093SHong Zhang PetscFunctionReturn(0); 9383c3a9c75SAmlan Barua } 9393c3a9c75SAmlan Barua 9403564f093SHong Zhang /*@ 9413564f093SHong Zhang VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 9423564f093SHong Zhang 9433564f093SHong Zhang Collective on Mat 9443564f093SHong Zhang 9453564f093SHong Zhang Input Parameters: 9463564f093SHong Zhang + A - FFTW matrix 9473564f093SHong Zhang - x - FFTW vector 9483564f093SHong Zhang 9493564f093SHong Zhang Output Parameters: 9503564f093SHong Zhang . y - PETSc vector 9513564f093SHong Zhang 9523564f093SHong Zhang Level: intermediate 9533564f093SHong Zhang 9543564f093SHong Zhang Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 9553564f093SHong Zhang VecScatterFFTWToPetsc removes those extra zeros. 9563564f093SHong Zhang 9573564f093SHong Zhang .seealso: VecScatterPetscToFFTW() 9583564f093SHong Zhang @*/ 95974a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 9603c3a9c75SAmlan Barua { 9613c3a9c75SAmlan Barua PetscFunctionBegin; 962*cac4c232SBarry Smith PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y)); 9633c3a9c75SAmlan Barua PetscFunctionReturn(0); 9643c3a9c75SAmlan Barua } 96554efbe56SHong Zhang 96674a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 9675b3e41ffSAmlan Barua { 968ce94432eSBarry Smith MPI_Comm comm; 9695b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 9709496c9aeSAmlan Barua PetscInt low; 9717a21806cSHong Zhang PetscMPIInt rank,size; 9725b3e41ffSAmlan Barua VecScatter vecscat; 9730cf2e031SBarry Smith IS list1; 9749496c9aeSAmlan Barua 9753564f093SHong Zhang PetscFunctionBegin; 9769566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 9779566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 9789566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 9799566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(x,&low,NULL)); 9805b3e41ffSAmlan Barua 981e81bb599SAmlan Barua if (size==1) { 9829566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm,fft->N,0,1,&list1)); 9839566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,list1,y,list1,&vecscat)); 9849566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 9859566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 9869566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 9879566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 988e81bb599SAmlan Barua 9890cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 9903564f093SHong Zhang } else { 9910cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 9920cf2e031SBarry Smith PetscInt ndim = fft->ndim,*dim = fft->dim; 9930cf2e031SBarry Smith ptrdiff_t local_n0,local_0_start; 9940cf2e031SBarry Smith ptrdiff_t local_n1,local_1_start; 9950cf2e031SBarry Smith IS list2; 9960cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 9970cf2e031SBarry Smith PetscInt i,j,k,partial_dim; 9980cf2e031SBarry Smith PetscInt *indx1, *indx2, tempindx, tempindx1; 9990cf2e031SBarry Smith PetscInt NM; 10000cf2e031SBarry Smith ptrdiff_t temp; 10010cf2e031SBarry Smith #endif 1002e81bb599SAmlan Barua switch (ndim) { 1003e81bb599SAmlan Barua case 1: 100464657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10057a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 100626fbe8dcSKarl Rupp 10079566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm,local_n1,local_1_start,1,&list1)); 10089566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm,local_n1,low,1,&list2)); 10099566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 10109566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 10119566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 10129566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10139566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10149566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 101564657d84SAmlan Barua #else 10166a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 101764657d84SAmlan Barua #endif 10185b3e41ffSAmlan Barua break; 10195b3e41ffSAmlan Barua case 2: 1020bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10217a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 102226fbe8dcSKarl Rupp 10239566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1)); 10249566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm,local_n0*dim[1],low,1,&list2)); 10259566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat)); 10269566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 10279566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 10289566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10299566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10309566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 1031bd59e6c6SAmlan Barua #else 10327a21806cSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 103326fbe8dcSKarl Rupp 10349566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1)); 10359566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2)); 10365b3e41ffSAmlan Barua 1037db4deed7SKarl Rupp if (dim[1]%2==0) NM = dim[1]+2; 1038db4deed7SKarl Rupp else NM = dim[1]+1; 10395b3e41ffSAmlan Barua 10405b3e41ffSAmlan Barua for (i=0; i<local_n0; i++) { 10415b3e41ffSAmlan Barua for (j=0; j<dim[1]; j++) { 10425b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 10435b3e41ffSAmlan Barua tempindx1 = i*NM + j; 104426fbe8dcSKarl Rupp 10455b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 10465b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 10475b3e41ffSAmlan Barua } 10485b3e41ffSAmlan Barua } 10495b3e41ffSAmlan Barua 10509566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1)); 10519566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2)); 10525b3e41ffSAmlan Barua 10539566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat)); 10549566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 10559566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 10569566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10579566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10589566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 10599566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 10609566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 1061bd59e6c6SAmlan Barua #endif 10629496c9aeSAmlan Barua break; 10635b3e41ffSAmlan Barua case 3: 1064bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10657a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 106626fbe8dcSKarl Rupp 10679566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1)); 10689566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2)); 10699566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 10709566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 10719566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 10729566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 10739566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 10749566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 1075bd59e6c6SAmlan Barua #else 10767a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 107726fbe8dcSKarl Rupp 10789566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1)); 10799566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2)); 108051d1eed7SAmlan Barua 1081db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 1082db4deed7SKarl Rupp else NM = dim[2]+1; 108351d1eed7SAmlan Barua 108451d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 108551d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 108651d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 108751d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 108851d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 108926fbe8dcSKarl Rupp 109051d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 109151d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 109251d1eed7SAmlan Barua } 109351d1eed7SAmlan Barua } 109451d1eed7SAmlan Barua } 109551d1eed7SAmlan Barua 10969566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1)); 10979566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2)); 109851d1eed7SAmlan Barua 10999566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat)); 11009566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 11019566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 11029566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 11039566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 11049566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 11059566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 11069566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 1107bd59e6c6SAmlan Barua #endif 11089496c9aeSAmlan Barua break; 11095b3e41ffSAmlan Barua default: 1110bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 11117a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 111226fbe8dcSKarl Rupp 11139566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1)); 11149566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2)); 11159566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 11169566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 11179566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 11189566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 11199566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 11209566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 1121bd59e6c6SAmlan Barua #else 1122ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 112326fbe8dcSKarl Rupp 1124ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 112526fbe8dcSKarl Rupp 1126c3eba89aSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 112726fbe8dcSKarl Rupp 1128ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1129ba6e06d0SAmlan Barua 1130ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1131ba6e06d0SAmlan Barua 11329566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1)); 11339566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2)); 1134ba6e06d0SAmlan Barua 1135db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 1136db4deed7SKarl Rupp else NM = 1; 1137ba6e06d0SAmlan Barua 1138ba6e06d0SAmlan Barua j = low; 11393564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1140ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1141ba6e06d0SAmlan Barua indx2[i] = j; 11423564f093SHong Zhang if (k%dim[ndim-1]==0) j+=NM; 1143ba6e06d0SAmlan Barua j++; 1144ba6e06d0SAmlan Barua } 11459566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1)); 11469566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2)); 1147ba6e06d0SAmlan Barua 11489566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat)); 11499566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 11509566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 11519566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat)); 11529566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1)); 11539566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2)); 11549566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1)); 11559566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2)); 1156bd59e6c6SAmlan Barua #endif 11579496c9aeSAmlan Barua break; 11585b3e41ffSAmlan Barua } 11590cf2e031SBarry Smith #endif 1160e81bb599SAmlan Barua } 11613564f093SHong Zhang PetscFunctionReturn(0); 11625b3e41ffSAmlan Barua } 11635b3e41ffSAmlan Barua 11643c3a9c75SAmlan Barua /* 11653564f093SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 11663564f093SHong Zhang 11673c3a9c75SAmlan Barua Options Database Keys: 11683c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 11693c3a9c75SAmlan Barua 11703c3a9c75SAmlan Barua Level: intermediate 11713c3a9c75SAmlan Barua 11723c3a9c75SAmlan Barua */ 11738cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1174b2b77a04SHong Zhang { 1175ce94432eSBarry Smith MPI_Comm comm; 1176b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 1177b2b77a04SHong Zhang Mat_FFTW *fftw; 11780cf2e031SBarry Smith PetscInt ndim = fft->ndim,*dim = fft->dim; 11795274ed1bSHong Zhang const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 11805274ed1bSHong Zhang unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1181b2b77a04SHong Zhang PetscBool flg; 11824f48bc67SAmlan Barua PetscInt p_flag,partial_dim=1,ctr; 118311902ff2SHong Zhang PetscMPIInt size,rank; 11849496c9aeSAmlan Barua ptrdiff_t *pdim; 11855f80ce2aSJacob Faibussowitsch PetscErrorCode ierr; 11869496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11870cf2e031SBarry Smith PetscInt tot_dim; 11889496c9aeSAmlan Barua #endif 11899496c9aeSAmlan Barua 1190b2b77a04SHong Zhang PetscFunctionBegin; 11919566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 11929566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 11939566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1194c92418ccSAmlan Barua 11950cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 11961878d478SAmlan Barua fftw_mpi_init(); 11970cf2e031SBarry Smith #endif 119811902ff2SHong Zhang pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 119911902ff2SHong Zhang pdim[0] = dim[0]; 12008ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12018ca4c034SAmlan Barua tot_dim = 2*dim[0]; 12028ca4c034SAmlan Barua #endif 12033564f093SHong Zhang for (ctr=1; ctr<ndim; ctr++) { 12046882372aSHong Zhang partial_dim *= dim[ctr]; 120511902ff2SHong Zhang pdim[ctr] = dim[ctr]; 12068ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1207db4deed7SKarl Rupp if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1208db4deed7SKarl Rupp else tot_dim *= dim[ctr]; 12098ca4c034SAmlan Barua #endif 12106882372aSHong Zhang } 12113c3a9c75SAmlan Barua 1212b2b77a04SHong Zhang if (size == 1) { 1213e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12149566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A,fft->N,fft->N,fft->N,fft->N)); 12150cf2e031SBarry Smith fft->n = fft->N; 1216e81bb599SAmlan Barua #else 12179566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim)); 12180cf2e031SBarry Smith fft->n = tot_dim; 12190cf2e031SBarry Smith #endif 12200cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 12210cf2e031SBarry Smith } else { 12220cf2e031SBarry Smith ptrdiff_t local_n0,local_0_start,local_n1,local_1_start; 12230cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 12240cf2e031SBarry Smith ptrdiff_t temp; 12250cf2e031SBarry Smith PetscInt N1; 1226e81bb599SAmlan Barua #endif 1227e81bb599SAmlan Barua 1228b2b77a04SHong Zhang switch (ndim) { 1229b2b77a04SHong Zhang case 1: 12303c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12313c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1232e5d7f247SAmlan Barua #else 12337a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 12340cf2e031SBarry Smith fft->n = (PetscInt)local_n0; 12359566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A,local_n1,fft->n,fft->N,fft->N)); 1236e5d7f247SAmlan Barua #endif 1237b2b77a04SHong Zhang break; 1238b2b77a04SHong Zhang case 2: 12395b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 12407a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 12410cf2e031SBarry Smith fft->n = (PetscInt)local_n0*dim[1]; 12429566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A,fft->n,fft->n,fft->N,fft->N)); 12435b3e41ffSAmlan Barua #else 1244c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 124526fbe8dcSKarl Rupp 12460cf2e031SBarry Smith fft->n = 2*(PetscInt)local_n0*(dim[1]/2+1); 12479566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A,fft->n,fft->n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1))); 12485b3e41ffSAmlan Barua #endif 1249b2b77a04SHong Zhang break; 1250b2b77a04SHong Zhang case 3: 125151d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12527a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 125326fbe8dcSKarl Rupp 12540cf2e031SBarry Smith fft->n = (PetscInt)local_n0*dim[1]*dim[2]; 12559566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A,fft->n,fft->n,fft->N,fft->N)); 125651d1eed7SAmlan Barua #else 1257c3eba89aSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 125826fbe8dcSKarl Rupp 12590cf2e031SBarry Smith fft->n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 12609566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A,fft->n,fft->n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1))); 126151d1eed7SAmlan Barua #endif 1262b2b77a04SHong Zhang break; 1263b2b77a04SHong Zhang default: 1264b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12657a21806cSHong Zhang fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 126626fbe8dcSKarl Rupp 12670cf2e031SBarry Smith fft->n = (PetscInt)local_n0*partial_dim; 12689566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A,fft->n,fft->n,fft->N,fft->N)); 1269b3a17365SAmlan Barua #else 1270b3a17365SAmlan Barua temp = pdim[ndim-1]; 127126fbe8dcSKarl Rupp 1272b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 127326fbe8dcSKarl Rupp 1274c3eba89aSHong Zhang fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 127526fbe8dcSKarl Rupp 12760cf2e031SBarry Smith fft->n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 12770cf2e031SBarry Smith N1 = 2*fft->N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 127826fbe8dcSKarl Rupp 1279b3a17365SAmlan Barua pdim[ndim-1] = temp; 128026fbe8dcSKarl Rupp 12819566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A,fft->n,fft->n,N1,N1)); 1282b3a17365SAmlan Barua #endif 1283b2b77a04SHong Zhang break; 1284b2b77a04SHong Zhang } 12850cf2e031SBarry Smith #endif 1286b2b77a04SHong Zhang } 12872277172eSMark Adams free(pdim); 12889566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATFFTW)); 12899566063dSJacob Faibussowitsch PetscCall(PetscNewLog(A,&fftw)); 1290b2b77a04SHong Zhang fft->data = (void*)fftw; 1291b2b77a04SHong Zhang 12920c74a584SJed Brown fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1293e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 129426fbe8dcSKarl Rupp 12959566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw))); 12968c1d5ab3SHong Zhang if (size == 1) { 1297a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 1298a1b6d50cSHong Zhang fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim); 1299a1b6d50cSHong Zhang #else 13008c1d5ab3SHong Zhang fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1301a1b6d50cSHong Zhang #endif 13028c1d5ab3SHong Zhang } 130326fbe8dcSKarl Rupp 1304b1301b2eSHong Zhang for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1305c92418ccSAmlan Barua 1306f4259b30SLisandro Dalcin fftw->p_forward = NULL; 1307f4259b30SLisandro Dalcin fftw->p_backward = NULL; 1308b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1309b2b77a04SHong Zhang 1310b2b77a04SHong Zhang if (size == 1) { 1311b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1312b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 13130cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 1314b2b77a04SHong Zhang } else { 1315b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1316b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 13170cf2e031SBarry Smith #endif 1318b2b77a04SHong Zhang } 1319b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1320b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 13214a52bad8SHong Zhang A->preallocated = PETSC_TRUE; 132226fbe8dcSKarl Rupp 13239566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW)); 13249566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW)); 13259566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW)); 1326b2b77a04SHong Zhang 1327b2b77a04SHong Zhang /* get runtime options */ 13289566063dSJacob Faibussowitsch ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");PetscCall(ierr); 13299566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg)); 13305f80ce2aSJacob Faibussowitsch if (flg) fftw->p_flag = iplans[p_flag]; 13319566063dSJacob Faibussowitsch ierr = PetscOptionsEnd();PetscCall(ierr); 1332b2b77a04SHong Zhang PetscFunctionReturn(0); 1333b2b77a04SHong Zhang } 1334