1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14e5d7f247SAmlan Barua PetscInt partial_dim; 15b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 16b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 19b2b77a04SHong Zhang } Mat_FFTW; 20b2b77a04SHong Zhang 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27b2b77a04SHong Zhang 2897504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel 2997504071SAmlan Barua 3097504071SAmlan Barua Input parameter: 3197504071SAmlan Barua mat - the matrix 3297504071SAmlan Barua x - the vector on which FDFT will be performed 3397504071SAmlan Barua 3497504071SAmlan Barua Output parameter: 3597504071SAmlan Barua y - vector that stores result of FDFT 3697504071SAmlan Barua 3797504071SAmlan Barua */ 38b2b77a04SHong Zhang #undef __FUNCT__ 39b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 40b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 41b2b77a04SHong Zhang { 42b2b77a04SHong Zhang PetscErrorCode ierr; 43b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 44b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 45b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 46b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 47b2b77a04SHong Zhang 48b2b77a04SHong Zhang PetscFunctionBegin; 49b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 50b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 51b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 52b2b77a04SHong Zhang switch (ndim){ 53b2b77a04SHong Zhang case 1: 5458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 55b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 5658a912c5SAmlan Barua #else 5758a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 5858a912c5SAmlan Barua #endif 59b2b77a04SHong Zhang break; 60b2b77a04SHong Zhang case 2: 6158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 62b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6358a912c5SAmlan Barua #else 6458a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 6558a912c5SAmlan Barua #endif 66b2b77a04SHong Zhang break; 67b2b77a04SHong Zhang case 3: 6858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 69b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 7058a912c5SAmlan Barua #else 7158a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 7258a912c5SAmlan Barua #endif 73b2b77a04SHong Zhang break; 74b2b77a04SHong Zhang default: 7558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 76b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 7758a912c5SAmlan Barua #else 7858a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 7958a912c5SAmlan Barua #endif 80b2b77a04SHong Zhang break; 81b2b77a04SHong Zhang } 82b2b77a04SHong Zhang fftw->finarray = x_array; 83b2b77a04SHong Zhang fftw->foutarray = y_array; 84b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 85b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 86b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 87b2b77a04SHong Zhang } else { /* use existing plan */ 88b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 89b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 90b2b77a04SHong Zhang } else { 91b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 92b2b77a04SHong Zhang } 93b2b77a04SHong Zhang } 94b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 95b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 96b2b77a04SHong Zhang PetscFunctionReturn(0); 97b2b77a04SHong Zhang } 98b2b77a04SHong Zhang 9997504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT 10097504071SAmlan Barua 10197504071SAmlan Barua Input parameter: 10297504071SAmlan Barua mat - the matrix 10397504071SAmlan Barua x - the vector on which BDFT will be performed 10497504071SAmlan Barua 10597504071SAmlan Barua Output parameter: 10697504071SAmlan Barua y - vector that stores result of BDFT 10797504071SAmlan Barua 10897504071SAmlan Barua */ 10997504071SAmlan Barua 110b2b77a04SHong Zhang #undef __FUNCT__ 111b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 112b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 113b2b77a04SHong Zhang { 114b2b77a04SHong Zhang PetscErrorCode ierr; 115b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 116b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 117b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 118b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 119b2b77a04SHong Zhang 120b2b77a04SHong Zhang PetscFunctionBegin; 121b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 122b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 123b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 124b2b77a04SHong Zhang switch (ndim){ 125b2b77a04SHong Zhang case 1: 12658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 127b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12858a912c5SAmlan Barua #else 129e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 13058a912c5SAmlan Barua #endif 131b2b77a04SHong Zhang break; 132b2b77a04SHong Zhang case 2: 13358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 134b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 13558a912c5SAmlan Barua #else 136e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 13758a912c5SAmlan Barua #endif 138b2b77a04SHong Zhang break; 139b2b77a04SHong Zhang case 3: 14058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 141b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 14258a912c5SAmlan Barua #else 143e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 14458a912c5SAmlan Barua #endif 145b2b77a04SHong Zhang break; 146b2b77a04SHong Zhang default: 14758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 148b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 14958a912c5SAmlan Barua #else 150e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 15158a912c5SAmlan Barua #endif 152b2b77a04SHong Zhang break; 153b2b77a04SHong Zhang } 154b2b77a04SHong Zhang fftw->binarray = x_array; 155b2b77a04SHong Zhang fftw->boutarray = y_array; 156b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 157b2b77a04SHong Zhang } else { /* use existing plan */ 158b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 159b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 160b2b77a04SHong Zhang } else { 161b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 162b2b77a04SHong Zhang } 163b2b77a04SHong Zhang } 164b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 165b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 166b2b77a04SHong Zhang PetscFunctionReturn(0); 167b2b77a04SHong Zhang } 168b2b77a04SHong Zhang 16997504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel 17097504071SAmlan Barua 17197504071SAmlan Barua Input parameter: 17297504071SAmlan Barua mat - the matrix 17397504071SAmlan Barua x - the vector on which FDFT will be performed 17497504071SAmlan Barua 17597504071SAmlan Barua Output parameter: 17697504071SAmlan Barua y - vector that stores result of FDFT 17797504071SAmlan Barua 17897504071SAmlan Barua */ 17997504071SAmlan Barua 180b2b77a04SHong Zhang #undef __FUNCT__ 181b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 182b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 183b2b77a04SHong Zhang { 184b2b77a04SHong Zhang PetscErrorCode ierr; 185b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 186b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 187b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 188c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 189b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 190b2b77a04SHong Zhang 191b2b77a04SHong Zhang PetscFunctionBegin; 192b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 193b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 194b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 195b2b77a04SHong Zhang switch (ndim){ 196b2b77a04SHong Zhang case 1: 1973c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 198b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 199ae0a50aaSHong Zhang #else 200ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 2013c3a9c75SAmlan Barua #endif 202b2b77a04SHong Zhang break; 203b2b77a04SHong Zhang case 2: 20497504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 205b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2063c3a9c75SAmlan Barua #else 2073c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2083c3a9c75SAmlan Barua #endif 209b2b77a04SHong Zhang break; 210b2b77a04SHong Zhang case 3: 2113c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 212b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2133c3a9c75SAmlan Barua #else 21451d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2153c3a9c75SAmlan Barua #endif 216b2b77a04SHong Zhang break; 217b2b77a04SHong Zhang default: 2183c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 219c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2203c3a9c75SAmlan Barua #else 2213c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2223c3a9c75SAmlan Barua #endif 223b2b77a04SHong Zhang break; 224b2b77a04SHong Zhang } 225b2b77a04SHong Zhang fftw->finarray = x_array; 226b2b77a04SHong Zhang fftw->foutarray = y_array; 227b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 228b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 229b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 230b2b77a04SHong Zhang } else { /* use existing plan */ 231b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 232b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 233b2b77a04SHong Zhang } else { 234b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 235b2b77a04SHong Zhang } 236b2b77a04SHong Zhang } 237b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 238b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 239b2b77a04SHong Zhang PetscFunctionReturn(0); 240b2b77a04SHong Zhang } 241b2b77a04SHong Zhang 24297504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT 24397504071SAmlan Barua 24497504071SAmlan Barua Input parameter: 24597504071SAmlan Barua mat - the matrix 24697504071SAmlan Barua x - the vector on which BDFT will be performed 24797504071SAmlan Barua 24897504071SAmlan Barua Output parameter: 24997504071SAmlan Barua y - vector that stores result of BDFT 25097504071SAmlan Barua 25197504071SAmlan Barua */ 252b2b77a04SHong Zhang #undef __FUNCT__ 253b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 254b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 255b2b77a04SHong Zhang { 256b2b77a04SHong Zhang PetscErrorCode ierr; 257b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 258b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 259b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 260c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 261b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 262b2b77a04SHong Zhang 263b2b77a04SHong Zhang PetscFunctionBegin; 264b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 265b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 266b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 267b2b77a04SHong Zhang switch (ndim){ 268b2b77a04SHong Zhang case 1: 2693c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 270b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 271ae0a50aaSHong Zhang #else 272ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 2733c3a9c75SAmlan Barua #endif 274b2b77a04SHong Zhang break; 275b2b77a04SHong Zhang case 2: 27697504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 277b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2783c3a9c75SAmlan Barua #else 2793c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2803c3a9c75SAmlan Barua #endif 281b2b77a04SHong Zhang break; 282b2b77a04SHong Zhang case 3: 2833c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 284b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2853c3a9c75SAmlan Barua #else 2863c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2873c3a9c75SAmlan Barua #endif 288b2b77a04SHong Zhang break; 289b2b77a04SHong Zhang default: 2903c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 291c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2923c3a9c75SAmlan Barua #else 2933c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2943c3a9c75SAmlan Barua #endif 295b2b77a04SHong Zhang break; 296b2b77a04SHong Zhang } 297b2b77a04SHong Zhang fftw->binarray = x_array; 298b2b77a04SHong Zhang fftw->boutarray = y_array; 299b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 300b2b77a04SHong Zhang } else { /* use existing plan */ 301b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 302b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 303b2b77a04SHong Zhang } else { 304b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 305b2b77a04SHong Zhang } 306b2b77a04SHong Zhang } 307b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 308b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 309b2b77a04SHong Zhang PetscFunctionReturn(0); 310b2b77a04SHong Zhang } 311b2b77a04SHong Zhang 312b2b77a04SHong Zhang #undef __FUNCT__ 313b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 314b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 315b2b77a04SHong Zhang { 316b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 317b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 318b2b77a04SHong Zhang PetscErrorCode ierr; 319b2b77a04SHong Zhang 320b2b77a04SHong Zhang PetscFunctionBegin; 321b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 322b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 323b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 324bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 325b2b77a04SHong Zhang PetscFunctionReturn(0); 326b2b77a04SHong Zhang } 327b2b77a04SHong Zhang 328c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 329b2b77a04SHong Zhang #undef __FUNCT__ 330b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 331b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 332b2b77a04SHong Zhang { 333b2b77a04SHong Zhang PetscErrorCode ierr; 334b2b77a04SHong Zhang PetscScalar *array; 335b2b77a04SHong Zhang 336b2b77a04SHong Zhang PetscFunctionBegin; 337b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 338b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 339b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 340b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 341b2b77a04SHong Zhang PetscFunctionReturn(0); 342b2b77a04SHong Zhang } 343b2b77a04SHong Zhang 3443c3a9c75SAmlan Barua 3454f7415efSAmlan Barua #undef __FUNCT__ 3464be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW" 3474be45526SHong Zhang /*@ 348b2aa233eSHong Zhang MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 3494f7415efSAmlan Barua parallel layout determined by FFTW 3504f7415efSAmlan Barua 3514f7415efSAmlan Barua Collective on Mat 3524f7415efSAmlan Barua 3534f7415efSAmlan Barua Input Parameter: 3544f7415efSAmlan Barua . mat - the matrix 3554f7415efSAmlan Barua 3564f7415efSAmlan Barua Output Parameter: 3574f7415efSAmlan Barua + fin - (optional) input vector of forward FFTW 3584f7415efSAmlan Barua - fout - (optional) output vector of forward FFTW 35997504071SAmlan Barua - bout - (optional) output vector of backward FFTW 3604f7415efSAmlan Barua 3614f7415efSAmlan Barua Level: advanced 36297504071SAmlan Barua Note: The parallel layout of output of forward FFTW is always same as the input 36397504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 36497504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 36597504071SAmlan Barua Also note that we need to provide enough space while doing parallel real transform. 36697504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 36797504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 36897504071SAmlan Barua last dimension from n to n/2+1 while invoking this routine. 3694f7415efSAmlan Barua 3704f7415efSAmlan Barua .seealso: MatCreateFFTW() 3714be45526SHong Zhang @*/ 3724be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 3734be45526SHong Zhang { 3744be45526SHong Zhang PetscErrorCode ierr; 375b4c3921fSHong Zhang 3764be45526SHong Zhang PetscFunctionBegin; 3774be45526SHong Zhang ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 3784be45526SHong Zhang PetscFunctionReturn(0); 3794be45526SHong Zhang } 3804be45526SHong Zhang 3814f7415efSAmlan Barua EXTERN_C_BEGIN 3824be45526SHong Zhang #undef __FUNCT__ 3834be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW" 3844be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 3854f7415efSAmlan Barua { 3864f7415efSAmlan Barua PetscErrorCode ierr; 3874f7415efSAmlan Barua PetscMPIInt size,rank; 3884f7415efSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 3894f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 3904f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 3919496c9aeSAmlan Barua PetscInt N=fft->N; 3924f7415efSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 3934f7415efSAmlan Barua 3944f7415efSAmlan Barua PetscFunctionBegin; 3954f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 3964f7415efSAmlan Barua PetscValidType(A,1); 3974f7415efSAmlan Barua 3984f7415efSAmlan Barua ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 3994f7415efSAmlan Barua ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 4004f7415efSAmlan Barua if (size == 1){ /* sequential case */ 4014f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4024f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 4034f7415efSAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 4044f7415efSAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 4054f7415efSAmlan Barua #else 4068ca4c034SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 4078ca4c034SAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 4088ca4c034SAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 4094f7415efSAmlan Barua #endif 41097504071SAmlan Barua } else { /* parallel cases */ 4114f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 4129496c9aeSAmlan Barua ptrdiff_t local_n1; 4139496c9aeSAmlan Barua fftw_complex *data_fout; 4149496c9aeSAmlan Barua ptrdiff_t local_1_start; 4159496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4169496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 4179496c9aeSAmlan Barua #else 4184f7415efSAmlan Barua double *data_finr,*data_boutr; 4199496c9aeSAmlan Barua PetscInt n1,N1,vsize; 4209496c9aeSAmlan Barua ptrdiff_t temp; 4219496c9aeSAmlan Barua #endif 4229496c9aeSAmlan Barua 4234f7415efSAmlan Barua switch (ndim){ 4244f7415efSAmlan Barua case 1: 42557625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 42657625b84SAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 42757625b84SAmlan Barua #else 42857625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 42957625b84SAmlan Barua if (fin) { 43057625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 43157625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 43257625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 43357625b84SAmlan Barua } 43457625b84SAmlan Barua if (fout) { 43557625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 43657625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 43757625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 43857625b84SAmlan Barua } 43957625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 44057625b84SAmlan Barua if (bout) { 44157625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 44257625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 44357625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 44457625b84SAmlan Barua } 44557625b84SAmlan Barua break; 44657625b84SAmlan Barua #endif 4474f7415efSAmlan Barua case 2: 44897504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 4494f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4504f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 4514f7415efSAmlan Barua if (fin) { 4524f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4534f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 4544f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4554f7415efSAmlan Barua } 4564f7415efSAmlan Barua if (bout) { 4574f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4584f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 4594f7415efSAmlan Barua ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 4604f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4614f7415efSAmlan Barua } 462c8a8a4f0SAmlan Barua //n1 = 2*local_n1*dim[0]; 46357625b84SAmlan Barua if (fout) { 46457625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4659496c9aeSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 46657625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 46757625b84SAmlan Barua } 4684f7415efSAmlan Barua #else 4694f7415efSAmlan Barua /* Get local size */ 4704f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 4714f7415efSAmlan Barua if (fin) { 4724f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4734f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4744f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4754f7415efSAmlan Barua } 4764f7415efSAmlan Barua if (fout) { 4774f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4784f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4794f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4804f7415efSAmlan Barua } 4814f7415efSAmlan Barua if (bout) { 4824f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4834f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 4844f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4854f7415efSAmlan Barua } 4864f7415efSAmlan Barua #endif 4874f7415efSAmlan Barua break; 4884f7415efSAmlan Barua case 3: 4894f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4904f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4914f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 4924f7415efSAmlan Barua if (fin) { 4934f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4944f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 4954f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4964f7415efSAmlan Barua } 4974f7415efSAmlan Barua if (bout) { 4984f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4994f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 5004f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5014f7415efSAmlan Barua } 502c8a8a4f0SAmlan Barua //n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 50357625b84SAmlan Barua if (fout) { 50457625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 50557625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 50657625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 50757625b84SAmlan Barua } 5084f7415efSAmlan Barua #else 5090c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 5100c9b18e4SAmlan Barua if (fin) { 5110c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5120c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5130c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5140c9b18e4SAmlan Barua } 5150c9b18e4SAmlan Barua if (fout) { 5160c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5170c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5180c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5190c9b18e4SAmlan Barua } 5200c9b18e4SAmlan Barua if (bout) { 5210c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5220c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5230c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5240c9b18e4SAmlan Barua } 5254f7415efSAmlan Barua #endif 5264f7415efSAmlan Barua break; 5274f7415efSAmlan Barua default: 5284f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5294f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 5304f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 5314f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5324f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 5334f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 5344f7415efSAmlan Barua 5354f7415efSAmlan Barua if (fin) { 5364f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5374f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 5384f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5394f7415efSAmlan Barua } 5404f7415efSAmlan Barua if (bout) { 5414f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5424f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 5439496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5444f7415efSAmlan Barua } 545c8a8a4f0SAmlan Barua //temp = fftw->partial_dim; 546c8a8a4f0SAmlan Barua //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 547c8a8a4f0SAmlan Barua //n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 54857625b84SAmlan Barua if (fout) { 54957625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 55057625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 55157625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 55257625b84SAmlan Barua } 5534f7415efSAmlan Barua #else 5540c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 5550c9b18e4SAmlan Barua if (fin) { 5560c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5570c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5580c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5590c9b18e4SAmlan Barua } 5600c9b18e4SAmlan Barua if (fout) { 5610c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5620c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5630c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5640c9b18e4SAmlan Barua } 5650c9b18e4SAmlan Barua if (bout) { 5660c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5670c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5680c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5690c9b18e4SAmlan Barua } 5704f7415efSAmlan Barua #endif 5714f7415efSAmlan Barua break; 5724f7415efSAmlan Barua } 5734f7415efSAmlan Barua } 5744f7415efSAmlan Barua PetscFunctionReturn(0); 5754f7415efSAmlan Barua } 5764f7415efSAmlan Barua EXTERN_C_END 5774f7415efSAmlan Barua 578c92418ccSAmlan Barua #undef __FUNCT__ 57974a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW" 58074a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 5813c3a9c75SAmlan Barua { 5823c3a9c75SAmlan Barua PetscErrorCode ierr; 5833c3a9c75SAmlan Barua PetscFunctionBegin; 58474a26884SAmlan Barua ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 5853c3a9c75SAmlan Barua PetscFunctionReturn(0); 5863c3a9c75SAmlan Barua } 58754efbe56SHong Zhang 588b2b77a04SHong Zhang /* 58997504071SAmlan Barua VecScatterPetscToFFTW_FFTW - Copies the user data to the vector that goes into FFTW block. 5903c3a9c75SAmlan Barua Input A, x, y 5913c3a9c75SAmlan Barua A - FFTW matrix 59254dd5118SAmlan Barua x - user data 593b2b77a04SHong Zhang Options Database Keys: 594b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 595b2b77a04SHong Zhang 596b2b77a04SHong Zhang Level: intermediate 59797504071SAmlan Barua Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 59897504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 59997504071SAmlan Barua zeros. This routine does that job by scattering operation. 60097504071SAmlan Barua 601b2b77a04SHong Zhang 602b2b77a04SHong Zhang */ 6033c3a9c75SAmlan Barua 6043c3a9c75SAmlan Barua EXTERN_C_BEGIN 6053c3a9c75SAmlan Barua #undef __FUNCT__ 60674a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW_FTTW" 60774a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 6083c3a9c75SAmlan Barua { 6093c3a9c75SAmlan Barua PetscErrorCode ierr; 6103c3a9c75SAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 6113c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 6123c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 6139496c9aeSAmlan Barua PetscInt N=fft->N; 614*b5d11533SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 6159496c9aeSAmlan Barua PetscInt low; 6168ca4c034SAmlan Barua PetscInt rank,size,vsize,vsize1; 6173c3a9c75SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 6189496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 6193c3a9c75SAmlan Barua VecScatter vecscat; 6203c3a9c75SAmlan Barua IS list1,list2; 6219496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6229496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 6239496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 6249496c9aeSAmlan Barua PetscInt N1, n1 ,NM; 6259496c9aeSAmlan Barua ptrdiff_t temp; 6269496c9aeSAmlan Barua #endif 6273c3a9c75SAmlan Barua 628f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 629f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 6303c3a9c75SAmlan Barua ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 6313c3a9c75SAmlan Barua 632e81bb599SAmlan Barua if (size==1) 633e81bb599SAmlan Barua { 6348ca4c034SAmlan Barua ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 6358ca4c034SAmlan Barua ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 6369496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 6376971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 6386971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6396971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6406971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 641b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 642e81bb599SAmlan Barua } 643e81bb599SAmlan Barua 644e81bb599SAmlan Barua else{ 645e81bb599SAmlan Barua 6463c3a9c75SAmlan Barua switch (ndim){ 6473c3a9c75SAmlan Barua case 1: 64864657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 64964657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 65064657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 65164657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 65264657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 65364657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 65464657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 65564657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 65664657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 65764657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 65864657d84SAmlan Barua #else 659e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 66064657d84SAmlan Barua #endif 6613c3a9c75SAmlan Barua break; 6623c3a9c75SAmlan Barua case 2: 663bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 664bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 665bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 666bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 667bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 668bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 669bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 670bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 671bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 672bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 673bd59e6c6SAmlan Barua #else 6743c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 6753c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 6769496c9aeSAmlan Barua //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 6779496c9aeSAmlan Barua //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 6789496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 6799496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 6803c3a9c75SAmlan Barua 6813c3a9c75SAmlan Barua if (dim[1]%2==0) 6823c3a9c75SAmlan Barua NM = dim[1]+2; 6833c3a9c75SAmlan Barua else 6843c3a9c75SAmlan Barua NM = dim[1]+1; 6853c3a9c75SAmlan Barua 6863c3a9c75SAmlan Barua for (i=0;i<local_n0;i++){ 6873c3a9c75SAmlan Barua for (j=0;j<dim[1];j++){ 6883c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 6893c3a9c75SAmlan Barua tempindx1 = i*NM + j; 6905b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 6913c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 6923c3a9c75SAmlan Barua } 6933c3a9c75SAmlan Barua } 6943c3a9c75SAmlan Barua 6953c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 6963c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 6973c3a9c75SAmlan Barua 698f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 699f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 700f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 701f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 702b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 703b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 704b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 705b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 706bd59e6c6SAmlan Barua #endif 7079496c9aeSAmlan Barua break; 7083c3a9c75SAmlan Barua 7093c3a9c75SAmlan Barua case 3: 710bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 711bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 712bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 713bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 714bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 715bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 716bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 717bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 718bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 719bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 720bd59e6c6SAmlan Barua #else 72151d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 72251d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 72351d1eed7SAmlan Barua 7249496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 7259496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 72651d1eed7SAmlan Barua 72751d1eed7SAmlan Barua if (dim[2]%2==0) 72851d1eed7SAmlan Barua NM = dim[2]+2; 72951d1eed7SAmlan Barua else 73051d1eed7SAmlan Barua NM = dim[2]+1; 73151d1eed7SAmlan Barua 73251d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 73351d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 73451d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 73551d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 73651d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 73751d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 73851d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 73951d1eed7SAmlan Barua } 74051d1eed7SAmlan Barua } 74151d1eed7SAmlan Barua } 74251d1eed7SAmlan Barua 74351d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 74451d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 74551d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 74651d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 74751d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 74851d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 749b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 750b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 751b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 752b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 753bd59e6c6SAmlan Barua #endif 7549496c9aeSAmlan Barua break; 7553c3a9c75SAmlan Barua 7563c3a9c75SAmlan Barua default: 757bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 758bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 759bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 760bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 761bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 762bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 763bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 764bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 765bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 766bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 767bd59e6c6SAmlan Barua #else 768e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 769e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 770e5d7f247SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 771e5d7f247SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 772e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 773e5d7f247SAmlan Barua 774e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 775e5d7f247SAmlan Barua 776e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 777e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 778e5d7f247SAmlan Barua 779e5d7f247SAmlan Barua if (dim[ndim-1]%2==0) 780ba6e06d0SAmlan Barua NM = 2; 781e5d7f247SAmlan Barua else 782ba6e06d0SAmlan Barua NM = 1; 783e5d7f247SAmlan Barua 7846971673cSAmlan Barua j = low; 7856971673cSAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 7866971673cSAmlan Barua { 7876971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 7886971673cSAmlan Barua indx2[i] = j; 7896971673cSAmlan Barua if (k%dim[ndim-1]==0) 7906971673cSAmlan Barua { j+=NM;} 7916971673cSAmlan Barua j++; 7926971673cSAmlan Barua } 7936971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 7946971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 7956971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 7966971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7976971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7986971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 799b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 800b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 801b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 802b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 803bd59e6c6SAmlan Barua #endif 8049496c9aeSAmlan Barua break; 8053c3a9c75SAmlan Barua } 806e81bb599SAmlan Barua } 8071abc6020SAmlan Barua return(0); 8083c3a9c75SAmlan Barua } 8093c3a9c75SAmlan Barua EXTERN_C_END 8103c3a9c75SAmlan Barua 8113c3a9c75SAmlan Barua #undef __FUNCT__ 81274a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc" 81374a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 8143c3a9c75SAmlan Barua { 8153c3a9c75SAmlan Barua PetscErrorCode ierr; 8163c3a9c75SAmlan Barua PetscFunctionBegin; 81774a26884SAmlan Barua ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 8183c3a9c75SAmlan Barua PetscFunctionReturn(0); 8193c3a9c75SAmlan Barua } 82054efbe56SHong Zhang 8215b3e41ffSAmlan Barua /* 82297504071SAmlan Barua VecScatterFFTWToPetsc_FFTW - Converts FFTW output to the PETSc vector that user can use. 8235b3e41ffSAmlan Barua Input A, x, y 8245b3e41ffSAmlan Barua A - FFTW matrix 8255b3e41ffSAmlan Barua x - FFTW vector 8265b3e41ffSAmlan Barua y - PETSc vector that user can read 8275b3e41ffSAmlan Barua 8285b3e41ffSAmlan Barua Level: intermediate 82997504071SAmlan Barua Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 83097504071SAmlan Barua VecScatterFFTWToPetsc removes those extra zeros. 8315b3e41ffSAmlan Barua 8323c3a9c75SAmlan Barua */ 8333c3a9c75SAmlan Barua 8343c3a9c75SAmlan Barua EXTERN_C_BEGIN 8353c3a9c75SAmlan Barua #undef __FUNCT__ 83674a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 83774a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 8385b3e41ffSAmlan Barua { 8395b3e41ffSAmlan Barua PetscErrorCode ierr; 8405b3e41ffSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 8415b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 8425b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 8439496c9aeSAmlan Barua PetscInt N=fft->N; 844b3655f67SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 8459496c9aeSAmlan Barua PetscInt low; 8469496c9aeSAmlan Barua PetscInt rank,size; 8475b3e41ffSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 8489496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 8495b3e41ffSAmlan Barua VecScatter vecscat; 8505b3e41ffSAmlan Barua IS list1,list2; 8519496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 8529496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 8539496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 8549496c9aeSAmlan Barua PetscInt N1, n1 ,NM; 8559496c9aeSAmlan Barua ptrdiff_t temp; 8569496c9aeSAmlan Barua #endif 8579496c9aeSAmlan Barua 8585b3e41ffSAmlan Barua 8595b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 8605b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 861b3655f67SAmlan Barua ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr); 8625b3e41ffSAmlan Barua 863e81bb599SAmlan Barua if (size==1){ 864b3655f67SAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 8656971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 8666971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8676971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8686971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 869b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 870e81bb599SAmlan Barua } 871e81bb599SAmlan Barua else{ 872e81bb599SAmlan Barua 873e81bb599SAmlan Barua switch (ndim){ 874e81bb599SAmlan Barua case 1: 87564657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 87664657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 8779496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1); 8789496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2); 87964657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 88064657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 88164657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 88264657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 88364657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 88464657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 88564657d84SAmlan Barua #else 8866a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 88764657d84SAmlan Barua #endif 8885b3e41ffSAmlan Barua break; 8895b3e41ffSAmlan Barua case 2: 890bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 891bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 892bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 893bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 894bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 895bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 897bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 898bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 899bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 900bd59e6c6SAmlan Barua #else 9015b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 9025b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 9035b3e41ffSAmlan Barua 9049496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 9059496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 9065b3e41ffSAmlan Barua 9075b3e41ffSAmlan Barua if (dim[1]%2==0) 9085b3e41ffSAmlan Barua NM = dim[1]+2; 9095b3e41ffSAmlan Barua else 9105b3e41ffSAmlan Barua NM = dim[1]+1; 9115b3e41ffSAmlan Barua 9125b3e41ffSAmlan Barua for (i=0;i<local_n0;i++){ 9135b3e41ffSAmlan Barua for (j=0;j<dim[1];j++){ 9145b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 9155b3e41ffSAmlan Barua tempindx1 = i*NM + j; 9165b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 9175b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 9185b3e41ffSAmlan Barua } 9195b3e41ffSAmlan Barua } 9205b3e41ffSAmlan Barua 9215b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 9225b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 9235b3e41ffSAmlan Barua 9245b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 9255b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9265b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9275b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 928b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 929b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 930b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 931b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 932bd59e6c6SAmlan Barua #endif 9339496c9aeSAmlan Barua break; 9345b3e41ffSAmlan Barua case 3: 935bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 936bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 937bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 938bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 939bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 940bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 941bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 942bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 943bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 944bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 945bd59e6c6SAmlan Barua #else 946bd59e6c6SAmlan Barua 94751d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 94851d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 94951d1eed7SAmlan Barua 9509496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 9519496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 9529496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 9539496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 95451d1eed7SAmlan Barua 95551d1eed7SAmlan Barua if (dim[2]%2==0) 95651d1eed7SAmlan Barua NM = dim[2]+2; 95751d1eed7SAmlan Barua else 95851d1eed7SAmlan Barua NM = dim[2]+1; 95951d1eed7SAmlan Barua 96051d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 96151d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 96251d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 96351d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 96451d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 96551d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 96651d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 96751d1eed7SAmlan Barua } 96851d1eed7SAmlan Barua } 96951d1eed7SAmlan Barua } 97051d1eed7SAmlan Barua 97151d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 97251d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 97351d1eed7SAmlan Barua 97451d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 97551d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 97651d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 97751d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 978b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 979b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 980b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 981b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 982bd59e6c6SAmlan Barua #endif 9839496c9aeSAmlan Barua break; 9845b3e41ffSAmlan Barua default: 985bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 986bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 987bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 988bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 989bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 990bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 991bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 993bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 994bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 995bd59e6c6SAmlan Barua #else 996ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 997ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 998ba6e06d0SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 999ba6e06d0SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1000ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1001ba6e06d0SAmlan Barua 1002ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1003ba6e06d0SAmlan Barua 1004ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1005ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1006ba6e06d0SAmlan Barua 1007ba6e06d0SAmlan Barua if (dim[ndim-1]%2==0) 1008ba6e06d0SAmlan Barua NM = 2; 1009ba6e06d0SAmlan Barua else 1010ba6e06d0SAmlan Barua NM = 1; 1011ba6e06d0SAmlan Barua 1012ba6e06d0SAmlan Barua j = low; 1013ba6e06d0SAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1014ba6e06d0SAmlan Barua { 1015ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1016ba6e06d0SAmlan Barua indx2[i] = j; 1017ba6e06d0SAmlan Barua if (k%dim[ndim-1]==0) 1018ba6e06d0SAmlan Barua { j+=NM;} 1019ba6e06d0SAmlan Barua j++; 1020ba6e06d0SAmlan Barua } 1021ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1022ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1023ba6e06d0SAmlan Barua 1024ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1025ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1027ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1028b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1029b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1030b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1031b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1032bd59e6c6SAmlan Barua #endif 10339496c9aeSAmlan Barua break; 10345b3e41ffSAmlan Barua } 1035e81bb599SAmlan Barua } 10365b3e41ffSAmlan Barua return 0; 10375b3e41ffSAmlan Barua } 10385b3e41ffSAmlan Barua EXTERN_C_END 10395b3e41ffSAmlan Barua 10405b3e41ffSAmlan Barua EXTERN_C_BEGIN 10415b3e41ffSAmlan Barua #undef __FUNCT__ 10423c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 10433c3a9c75SAmlan Barua /* 10443c3a9c75SAmlan Barua MatCreate_FFTW - Creates a matrix object that provides FFT 10453c3a9c75SAmlan Barua via the external package FFTW 10463c3a9c75SAmlan Barua Options Database Keys: 10473c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 10483c3a9c75SAmlan Barua 10493c3a9c75SAmlan Barua Level: intermediate 10503c3a9c75SAmlan Barua 10513c3a9c75SAmlan Barua */ 10523c3a9c75SAmlan Barua 1053b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 1054b2b77a04SHong Zhang { 1055b2b77a04SHong Zhang PetscErrorCode ierr; 1056b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 1057b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1058b2b77a04SHong Zhang Mat_FFTW *fftw; 1059b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1060b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1061b2b77a04SHong Zhang PetscBool flg; 10624f48bc67SAmlan Barua PetscInt p_flag,partial_dim=1,ctr; 106311902ff2SHong Zhang PetscMPIInt size,rank; 10649496c9aeSAmlan Barua ptrdiff_t *pdim; 10659496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 10669496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 10679496c9aeSAmlan Barua ptrdiff_t temp; 10688ca4c034SAmlan Barua PetscInt N1,tot_dim; 10694f48bc67SAmlan Barua #else 10704f48bc67SAmlan Barua PetscInt n1; 10719496c9aeSAmlan Barua #endif 10729496c9aeSAmlan Barua 1073b2b77a04SHong Zhang PetscFunctionBegin; 1074b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 107511902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1076c92418ccSAmlan Barua 107711902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 107811902ff2SHong Zhang pdim[0] = dim[0]; 10798ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 10808ca4c034SAmlan Barua tot_dim = 2*dim[0]; 10818ca4c034SAmlan Barua #endif 108211902ff2SHong Zhang for (ctr=1;ctr<ndim;ctr++) 108311902ff2SHong Zhang { 10846882372aSHong Zhang partial_dim *= dim[ctr]; 108511902ff2SHong Zhang pdim[ctr] = dim[ctr]; 10868ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 10878ca4c034SAmlan Barua if (ctr==ndim-1) 10888ca4c034SAmlan Barua tot_dim *= (dim[ctr]/2+1); 10898ca4c034SAmlan Barua else 10908ca4c034SAmlan Barua tot_dim *= dim[ctr]; 10918ca4c034SAmlan Barua #endif 10926882372aSHong Zhang } 10933c3a9c75SAmlan Barua 1094b2b77a04SHong Zhang if (size == 1) { 1095e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1096b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1097b2b77a04SHong Zhang n = N; 1098e81bb599SAmlan Barua #else 1099e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1100e81bb599SAmlan Barua n = tot_dim; 1101e81bb599SAmlan Barua #endif 1102e81bb599SAmlan Barua 1103b2b77a04SHong Zhang } else { 11041abc6020SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 1105b2b77a04SHong Zhang switch (ndim){ 1106b2b77a04SHong Zhang case 1: 11073c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11083c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1109e5d7f247SAmlan Barua #else 11109496c9aeSAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 11116882372aSHong Zhang n = (PetscInt)local_n0; 11129496c9aeSAmlan Barua n1 = (PetscInt) local_n1; 11139496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1114e5d7f247SAmlan Barua #endif 1115b2b77a04SHong Zhang break; 1116b2b77a04SHong Zhang case 2: 11175b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 1118b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1119b2b77a04SHong Zhang /* 1120b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1121b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 1122b2b77a04SHong Zhang */ 1123b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1124b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 11255b3e41ffSAmlan Barua #else 11265b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 11275b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1128c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 11295b3e41ffSAmlan Barua #endif 1130b2b77a04SHong Zhang break; 1131b2b77a04SHong Zhang case 3: 113251d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 113351d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 11346882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 11356882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 113651d1eed7SAmlan Barua #else 113751d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 113851d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1139c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 114051d1eed7SAmlan Barua #endif 1141b2b77a04SHong Zhang break; 1142b2b77a04SHong Zhang default: 1143b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 114411902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 11456882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 11466882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1147b3a17365SAmlan Barua #else 1148b3a17365SAmlan Barua temp = pdim[ndim-1]; 1149b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 1150b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1151b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1152b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1153b3a17365SAmlan Barua pdim[ndim-1] = temp; 1154c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1155b3a17365SAmlan Barua #endif 1156b2b77a04SHong Zhang break; 1157b2b77a04SHong Zhang } 1158b2b77a04SHong Zhang } 1159b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1160b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1161b2b77a04SHong Zhang fft->data = (void*)fftw; 1162b2b77a04SHong Zhang 1163b2b77a04SHong Zhang fft->n = n; 1164c92418ccSAmlan Barua fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1165e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 1166c92418ccSAmlan Barua ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1167b1301b2eSHong Zhang for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1168c92418ccSAmlan Barua 1169b2b77a04SHong Zhang fftw->p_forward = 0; 1170b2b77a04SHong Zhang fftw->p_backward = 0; 1171b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1172b2b77a04SHong Zhang 1173b2b77a04SHong Zhang if (size == 1){ 1174b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1175b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1176b2b77a04SHong Zhang } else { 1177b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1178b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1179b2b77a04SHong Zhang } 1180b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1181b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 11824be45526SHong Zhang PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW); 118374a26884SAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW); 118474a26884SAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW); 1185b2b77a04SHong Zhang 1186b2b77a04SHong Zhang /* get runtime options */ 1187b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1188b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1189b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 1190b2b77a04SHong Zhang PetscOptionsEnd(); 1191b2b77a04SHong Zhang PetscFunctionReturn(0); 1192b2b77a04SHong Zhang } 1193b2b77a04SHong Zhang EXTERN_C_END 11943c3a9c75SAmlan Barua 11953c3a9c75SAmlan Barua 11963c3a9c75SAmlan Barua 11973c3a9c75SAmlan Barua 1198