1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14e5d7f247SAmlan Barua PetscInt partial_dim; 15b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 16b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 19b2b77a04SHong Zhang } Mat_FFTW; 20b2b77a04SHong Zhang 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27b2b77a04SHong Zhang 2897504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel 2997504071SAmlan Barua 3097504071SAmlan Barua Input parameter: 3197504071SAmlan Barua mat - the matrix 3297504071SAmlan Barua x - the vector on which FDFT will be performed 3397504071SAmlan Barua 3497504071SAmlan Barua Output parameter: 3597504071SAmlan Barua y - vector that stores result of FDFT 3697504071SAmlan Barua 3797504071SAmlan Barua */ 38b2b77a04SHong Zhang #undef __FUNCT__ 39b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 40b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 41b2b77a04SHong Zhang { 42b2b77a04SHong Zhang PetscErrorCode ierr; 43b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 44b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 45b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 46b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 47b2b77a04SHong Zhang 48b2b77a04SHong Zhang PetscFunctionBegin; 49b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 50b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 51b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 52b2b77a04SHong Zhang switch (ndim){ 53b2b77a04SHong Zhang case 1: 5458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 55b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 5658a912c5SAmlan Barua #else 5758a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 5858a912c5SAmlan Barua #endif 59b2b77a04SHong Zhang break; 60b2b77a04SHong Zhang case 2: 6158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 62b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6358a912c5SAmlan Barua #else 6458a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 6558a912c5SAmlan Barua #endif 66b2b77a04SHong Zhang break; 67b2b77a04SHong Zhang case 3: 6858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 69b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 7058a912c5SAmlan Barua #else 7158a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 7258a912c5SAmlan Barua #endif 73b2b77a04SHong Zhang break; 74b2b77a04SHong Zhang default: 7558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 76b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 7758a912c5SAmlan Barua #else 7858a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 7958a912c5SAmlan Barua #endif 80b2b77a04SHong Zhang break; 81b2b77a04SHong Zhang } 82b2b77a04SHong Zhang fftw->finarray = x_array; 83b2b77a04SHong Zhang fftw->foutarray = y_array; 84b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 85b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 86b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 87b2b77a04SHong Zhang } else { /* use existing plan */ 88b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 89b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 90b2b77a04SHong Zhang } else { 91b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 92b2b77a04SHong Zhang } 93b2b77a04SHong Zhang } 94b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 95b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 96b2b77a04SHong Zhang PetscFunctionReturn(0); 97b2b77a04SHong Zhang } 98b2b77a04SHong Zhang 9997504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT 10097504071SAmlan Barua 10197504071SAmlan Barua Input parameter: 10297504071SAmlan Barua mat - the matrix 10397504071SAmlan Barua x - the vector on which BDFT will be performed 10497504071SAmlan Barua 10597504071SAmlan Barua Output parameter: 10697504071SAmlan Barua y - vector that stores result of BDFT 10797504071SAmlan Barua 10897504071SAmlan Barua */ 10997504071SAmlan Barua 110b2b77a04SHong Zhang #undef __FUNCT__ 111b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 112b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 113b2b77a04SHong Zhang { 114b2b77a04SHong Zhang PetscErrorCode ierr; 115b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 116b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 117b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 118b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 119b2b77a04SHong Zhang 120b2b77a04SHong Zhang PetscFunctionBegin; 121b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 122b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 123b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 124b2b77a04SHong Zhang switch (ndim){ 125b2b77a04SHong Zhang case 1: 12658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 127b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12858a912c5SAmlan Barua #else 129e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 13058a912c5SAmlan Barua #endif 131b2b77a04SHong Zhang break; 132b2b77a04SHong Zhang case 2: 13358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 134b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 13558a912c5SAmlan Barua #else 136e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 13758a912c5SAmlan Barua #endif 138b2b77a04SHong Zhang break; 139b2b77a04SHong Zhang case 3: 14058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 141b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 14258a912c5SAmlan Barua #else 143e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 14458a912c5SAmlan Barua #endif 145b2b77a04SHong Zhang break; 146b2b77a04SHong Zhang default: 14758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 148b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 14958a912c5SAmlan Barua #else 150e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 15158a912c5SAmlan Barua #endif 152b2b77a04SHong Zhang break; 153b2b77a04SHong Zhang } 154b2b77a04SHong Zhang fftw->binarray = x_array; 155b2b77a04SHong Zhang fftw->boutarray = y_array; 156b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 157b2b77a04SHong Zhang } else { /* use existing plan */ 158b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 159b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 160b2b77a04SHong Zhang } else { 161b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 162b2b77a04SHong Zhang } 163b2b77a04SHong Zhang } 164b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 165b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 166b2b77a04SHong Zhang PetscFunctionReturn(0); 167b2b77a04SHong Zhang } 168b2b77a04SHong Zhang 16997504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel 17097504071SAmlan Barua 17197504071SAmlan Barua Input parameter: 17297504071SAmlan Barua mat - the matrix 17397504071SAmlan Barua x - the vector on which FDFT will be performed 17497504071SAmlan Barua 17597504071SAmlan Barua Output parameter: 17697504071SAmlan Barua y - vector that stores result of FDFT 17797504071SAmlan Barua 17897504071SAmlan Barua */ 17997504071SAmlan Barua 180b2b77a04SHong Zhang #undef __FUNCT__ 181b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 182b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 183b2b77a04SHong Zhang { 184b2b77a04SHong Zhang PetscErrorCode ierr; 185b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 186b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 187b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 188c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 189b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 190b2b77a04SHong Zhang 191b2b77a04SHong Zhang PetscFunctionBegin; 192b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 193b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 194b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 195b2b77a04SHong Zhang switch (ndim){ 196b2b77a04SHong Zhang case 1: 1973c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 198b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 199ae0a50aaSHong Zhang #else 200ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 2013c3a9c75SAmlan Barua #endif 202b2b77a04SHong Zhang break; 203b2b77a04SHong Zhang case 2: 20497504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 205b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2063c3a9c75SAmlan Barua #else 2073c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2083c3a9c75SAmlan Barua #endif 209b2b77a04SHong Zhang break; 210b2b77a04SHong Zhang case 3: 2113c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 212b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2133c3a9c75SAmlan Barua #else 21451d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2153c3a9c75SAmlan Barua #endif 216b2b77a04SHong Zhang break; 217b2b77a04SHong Zhang default: 2183c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 219c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2203c3a9c75SAmlan Barua #else 2213c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2223c3a9c75SAmlan Barua #endif 223b2b77a04SHong Zhang break; 224b2b77a04SHong Zhang } 225b2b77a04SHong Zhang fftw->finarray = x_array; 226b2b77a04SHong Zhang fftw->foutarray = y_array; 227b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 228b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 229b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 230b2b77a04SHong Zhang } else { /* use existing plan */ 231b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 232b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 233b2b77a04SHong Zhang } else { 234b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 235b2b77a04SHong Zhang } 236b2b77a04SHong Zhang } 237b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 238b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 239b2b77a04SHong Zhang PetscFunctionReturn(0); 240b2b77a04SHong Zhang } 241b2b77a04SHong Zhang 24297504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT 24397504071SAmlan Barua 24497504071SAmlan Barua Input parameter: 24597504071SAmlan Barua mat - the matrix 24697504071SAmlan Barua x - the vector on which BDFT will be performed 24797504071SAmlan Barua 24897504071SAmlan Barua Output parameter: 24997504071SAmlan Barua y - vector that stores result of BDFT 25097504071SAmlan Barua 25197504071SAmlan Barua */ 252b2b77a04SHong Zhang #undef __FUNCT__ 253b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 254b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 255b2b77a04SHong Zhang { 256b2b77a04SHong Zhang PetscErrorCode ierr; 257b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 258b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 259b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 260c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 261b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 262b2b77a04SHong Zhang 263b2b77a04SHong Zhang PetscFunctionBegin; 264b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 265b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 266b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 267b2b77a04SHong Zhang switch (ndim){ 268b2b77a04SHong Zhang case 1: 2693c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 270b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 271ae0a50aaSHong Zhang #else 272ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 2733c3a9c75SAmlan Barua #endif 274b2b77a04SHong Zhang break; 275b2b77a04SHong Zhang case 2: 27697504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 277b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2783c3a9c75SAmlan Barua #else 2793c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2803c3a9c75SAmlan Barua #endif 281b2b77a04SHong Zhang break; 282b2b77a04SHong Zhang case 3: 2833c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 284b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2853c3a9c75SAmlan Barua #else 2863c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2873c3a9c75SAmlan Barua #endif 288b2b77a04SHong Zhang break; 289b2b77a04SHong Zhang default: 2903c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 291c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2923c3a9c75SAmlan Barua #else 2933c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2943c3a9c75SAmlan Barua #endif 295b2b77a04SHong Zhang break; 296b2b77a04SHong Zhang } 297b2b77a04SHong Zhang fftw->binarray = x_array; 298b2b77a04SHong Zhang fftw->boutarray = y_array; 299b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 300b2b77a04SHong Zhang } else { /* use existing plan */ 301b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 302b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 303b2b77a04SHong Zhang } else { 304b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 305b2b77a04SHong Zhang } 306b2b77a04SHong Zhang } 307b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 308b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 309b2b77a04SHong Zhang PetscFunctionReturn(0); 310b2b77a04SHong Zhang } 311b2b77a04SHong Zhang 312b2b77a04SHong Zhang #undef __FUNCT__ 313b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 314b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 315b2b77a04SHong Zhang { 316b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 317b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 318b2b77a04SHong Zhang PetscErrorCode ierr; 319b2b77a04SHong Zhang 320b2b77a04SHong Zhang PetscFunctionBegin; 321b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 322b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 323b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 324bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 325b2b77a04SHong Zhang PetscFunctionReturn(0); 326b2b77a04SHong Zhang } 327b2b77a04SHong Zhang 328c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 329b2b77a04SHong Zhang #undef __FUNCT__ 330b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 331b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 332b2b77a04SHong Zhang { 333b2b77a04SHong Zhang PetscErrorCode ierr; 334b2b77a04SHong Zhang PetscScalar *array; 335b2b77a04SHong Zhang 336b2b77a04SHong Zhang PetscFunctionBegin; 337b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 338b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 339b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 340b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 341b2b77a04SHong Zhang PetscFunctionReturn(0); 342b2b77a04SHong Zhang } 343b2b77a04SHong Zhang 3443c3a9c75SAmlan Barua 3454f7415efSAmlan Barua #undef __FUNCT__ 3464be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW" 3474be45526SHong Zhang /*@ 348b2aa233eSHong Zhang MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 3494f7415efSAmlan Barua parallel layout determined by FFTW 3504f7415efSAmlan Barua 3514f7415efSAmlan Barua Collective on Mat 3524f7415efSAmlan Barua 3534f7415efSAmlan Barua Input Parameter: 3544f7415efSAmlan Barua . mat - the matrix 3554f7415efSAmlan Barua 3564f7415efSAmlan Barua Output Parameter: 3574f7415efSAmlan Barua + fin - (optional) input vector of forward FFTW 3584f7415efSAmlan Barua - fout - (optional) output vector of forward FFTW 35997504071SAmlan Barua - bout - (optional) output vector of backward FFTW 3604f7415efSAmlan Barua 3614f7415efSAmlan Barua Level: advanced 36297504071SAmlan Barua Note: The parallel layout of output of forward FFTW is always same as the input 36397504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 36497504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 36597504071SAmlan Barua Also note that we need to provide enough space while doing parallel real transform. 36697504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 36797504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 368*e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 369*e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two 370*e0ec536eSAmlan Barua zeros if it is odd only one zero is needed. 371*e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local 372*e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for 373*e0ec536eSAmlan Barua each processor and returns that. 3744f7415efSAmlan Barua 3754f7415efSAmlan Barua .seealso: MatCreateFFTW() 3764be45526SHong Zhang @*/ 3774be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 3784be45526SHong Zhang { 3794be45526SHong Zhang PetscErrorCode ierr; 380b4c3921fSHong Zhang 3814be45526SHong Zhang PetscFunctionBegin; 3824be45526SHong Zhang ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 3834be45526SHong Zhang PetscFunctionReturn(0); 3844be45526SHong Zhang } 3854be45526SHong Zhang 3864f7415efSAmlan Barua EXTERN_C_BEGIN 3874be45526SHong Zhang #undef __FUNCT__ 3884be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW" 3894be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 3904f7415efSAmlan Barua { 3914f7415efSAmlan Barua PetscErrorCode ierr; 3924f7415efSAmlan Barua PetscMPIInt size,rank; 3934f7415efSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 3944f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 3954f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 3969496c9aeSAmlan Barua PetscInt N=fft->N; 3974f7415efSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 3984f7415efSAmlan Barua 3994f7415efSAmlan Barua PetscFunctionBegin; 4004f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 4014f7415efSAmlan Barua PetscValidType(A,1); 4024f7415efSAmlan Barua 4034f7415efSAmlan Barua ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 4044f7415efSAmlan Barua ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 4054f7415efSAmlan Barua if (size == 1){ /* sequential case */ 4064f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4074f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 4084f7415efSAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 4094f7415efSAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 4104f7415efSAmlan Barua #else 4118ca4c034SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 4128ca4c034SAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 4138ca4c034SAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 4144f7415efSAmlan Barua #endif 41597504071SAmlan Barua } else { /* parallel cases */ 4164f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 4179496c9aeSAmlan Barua ptrdiff_t local_n1; 4189496c9aeSAmlan Barua fftw_complex *data_fout; 4199496c9aeSAmlan Barua ptrdiff_t local_1_start; 4209496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4219496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 4229496c9aeSAmlan Barua #else 4234f7415efSAmlan Barua double *data_finr,*data_boutr; 4249496c9aeSAmlan Barua PetscInt n1,N1,vsize; 4259496c9aeSAmlan Barua ptrdiff_t temp; 4269496c9aeSAmlan Barua #endif 4279496c9aeSAmlan Barua 4284f7415efSAmlan Barua switch (ndim){ 4294f7415efSAmlan Barua case 1: 43057625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 43157625b84SAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 43257625b84SAmlan Barua #else 43357625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 43457625b84SAmlan Barua if (fin) { 43557625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 43657625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 43757625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 43857625b84SAmlan Barua } 43957625b84SAmlan Barua if (fout) { 44057625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 44157625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 44257625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 44357625b84SAmlan Barua } 44457625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 44557625b84SAmlan Barua if (bout) { 44657625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 44757625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 44857625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 44957625b84SAmlan Barua } 45057625b84SAmlan Barua break; 45157625b84SAmlan Barua #endif 4524f7415efSAmlan Barua case 2: 45397504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 4544f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4554f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 4564f7415efSAmlan Barua if (fin) { 4574f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4584f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 4594f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4604f7415efSAmlan Barua } 4614f7415efSAmlan Barua if (bout) { 4624f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4634f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 4644f7415efSAmlan Barua ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 4654f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4664f7415efSAmlan Barua } 467c8a8a4f0SAmlan Barua //n1 = 2*local_n1*dim[0]; 46857625b84SAmlan Barua if (fout) { 46957625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4709496c9aeSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 47157625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 47257625b84SAmlan Barua } 4734f7415efSAmlan Barua #else 4744f7415efSAmlan Barua /* Get local size */ 4754f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 4764f7415efSAmlan Barua if (fin) { 4774f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4784f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4794f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4804f7415efSAmlan Barua } 4814f7415efSAmlan Barua if (fout) { 4824f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4834f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4844f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4854f7415efSAmlan Barua } 4864f7415efSAmlan Barua if (bout) { 4874f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4884f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 4894f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4904f7415efSAmlan Barua } 4914f7415efSAmlan Barua #endif 4924f7415efSAmlan Barua break; 4934f7415efSAmlan Barua case 3: 4944f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4954f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4964f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 4974f7415efSAmlan Barua if (fin) { 4984f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4994f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 5004f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5014f7415efSAmlan Barua } 5024f7415efSAmlan Barua if (bout) { 5034f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5044f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 5054f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5064f7415efSAmlan Barua } 507c8a8a4f0SAmlan Barua //n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 50857625b84SAmlan Barua if (fout) { 50957625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 51057625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 51157625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 51257625b84SAmlan Barua } 5134f7415efSAmlan Barua #else 5140c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 5150c9b18e4SAmlan Barua if (fin) { 5160c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5170c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5180c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5190c9b18e4SAmlan Barua } 5200c9b18e4SAmlan Barua if (fout) { 5210c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5220c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5230c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5240c9b18e4SAmlan Barua } 5250c9b18e4SAmlan Barua if (bout) { 5260c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5270c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5280c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5290c9b18e4SAmlan Barua } 5304f7415efSAmlan Barua #endif 5314f7415efSAmlan Barua break; 5324f7415efSAmlan Barua default: 5334f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5344f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 5354f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 5364f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5374f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 5384f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 5394f7415efSAmlan Barua 5404f7415efSAmlan Barua if (fin) { 5414f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5424f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 5434f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5444f7415efSAmlan Barua } 5454f7415efSAmlan Barua if (bout) { 5464f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5474f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 5489496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5494f7415efSAmlan Barua } 550c8a8a4f0SAmlan Barua //temp = fftw->partial_dim; 551c8a8a4f0SAmlan Barua //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 552c8a8a4f0SAmlan Barua //n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 55357625b84SAmlan Barua if (fout) { 55457625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 55557625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 55657625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 55757625b84SAmlan Barua } 5584f7415efSAmlan Barua #else 5590c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 5600c9b18e4SAmlan Barua if (fin) { 5610c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5620c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5630c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5640c9b18e4SAmlan Barua } 5650c9b18e4SAmlan Barua if (fout) { 5660c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5670c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5680c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5690c9b18e4SAmlan Barua } 5700c9b18e4SAmlan Barua if (bout) { 5710c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5720c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5730c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5740c9b18e4SAmlan Barua } 5754f7415efSAmlan Barua #endif 5764f7415efSAmlan Barua break; 5774f7415efSAmlan Barua } 5784f7415efSAmlan Barua } 5794f7415efSAmlan Barua PetscFunctionReturn(0); 5804f7415efSAmlan Barua } 5814f7415efSAmlan Barua EXTERN_C_END 5824f7415efSAmlan Barua 583c92418ccSAmlan Barua #undef __FUNCT__ 58474a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW" 58574a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 5863c3a9c75SAmlan Barua { 5873c3a9c75SAmlan Barua PetscErrorCode ierr; 5883c3a9c75SAmlan Barua PetscFunctionBegin; 58974a26884SAmlan Barua ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 5903c3a9c75SAmlan Barua PetscFunctionReturn(0); 5913c3a9c75SAmlan Barua } 59254efbe56SHong Zhang 593b2b77a04SHong Zhang /* 59497504071SAmlan Barua VecScatterPetscToFFTW_FFTW - Copies the user data to the vector that goes into FFTW block. 5953c3a9c75SAmlan Barua Input A, x, y 5963c3a9c75SAmlan Barua A - FFTW matrix 59754dd5118SAmlan Barua x - user data 598b2b77a04SHong Zhang Options Database Keys: 599b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 600b2b77a04SHong Zhang 601b2b77a04SHong Zhang Level: intermediate 60297504071SAmlan Barua Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 60397504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 60497504071SAmlan Barua zeros. This routine does that job by scattering operation. 60597504071SAmlan Barua 606b2b77a04SHong Zhang 607b2b77a04SHong Zhang */ 6083c3a9c75SAmlan Barua 6093c3a9c75SAmlan Barua EXTERN_C_BEGIN 6103c3a9c75SAmlan Barua #undef __FUNCT__ 61174a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW_FTTW" 61274a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 6133c3a9c75SAmlan Barua { 6143c3a9c75SAmlan Barua PetscErrorCode ierr; 6153c3a9c75SAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 6163c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 6173c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 6189496c9aeSAmlan Barua PetscInt N=fft->N; 619b5d11533SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 6209496c9aeSAmlan Barua PetscInt low; 6218ca4c034SAmlan Barua PetscInt rank,size,vsize,vsize1; 6223c3a9c75SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 6239496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 6243c3a9c75SAmlan Barua VecScatter vecscat; 6253c3a9c75SAmlan Barua IS list1,list2; 6269496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6279496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 6289496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 6299496c9aeSAmlan Barua PetscInt N1, n1 ,NM; 6309496c9aeSAmlan Barua ptrdiff_t temp; 6319496c9aeSAmlan Barua #endif 6323c3a9c75SAmlan Barua 633f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 634f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 6353c3a9c75SAmlan Barua ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 6363c3a9c75SAmlan Barua 637e81bb599SAmlan Barua if (size==1) 638e81bb599SAmlan Barua { 6398ca4c034SAmlan Barua ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 6408ca4c034SAmlan Barua ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 6419496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 6426971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 6436971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6446971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6456971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 646b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 647e81bb599SAmlan Barua } 648e81bb599SAmlan Barua 649e81bb599SAmlan Barua else{ 650e81bb599SAmlan Barua 6513c3a9c75SAmlan Barua switch (ndim){ 6523c3a9c75SAmlan Barua case 1: 65364657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 65464657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 65564657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 65664657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 65764657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 65864657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 65964657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 66064657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 66164657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 66264657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 66364657d84SAmlan Barua #else 664e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 66564657d84SAmlan Barua #endif 6663c3a9c75SAmlan Barua break; 6673c3a9c75SAmlan Barua case 2: 668bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 669bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 670bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 671bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 672bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 673bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 674bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 675bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 676bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 677bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 678bd59e6c6SAmlan Barua #else 6793c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 6803c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 6819496c9aeSAmlan Barua //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 6829496c9aeSAmlan Barua //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 6839496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 6849496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 6853c3a9c75SAmlan Barua 6863c3a9c75SAmlan Barua if (dim[1]%2==0) 6873c3a9c75SAmlan Barua NM = dim[1]+2; 6883c3a9c75SAmlan Barua else 6893c3a9c75SAmlan Barua NM = dim[1]+1; 6903c3a9c75SAmlan Barua 6913c3a9c75SAmlan Barua for (i=0;i<local_n0;i++){ 6923c3a9c75SAmlan Barua for (j=0;j<dim[1];j++){ 6933c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 6943c3a9c75SAmlan Barua tempindx1 = i*NM + j; 6955b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 6963c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 6973c3a9c75SAmlan Barua } 6983c3a9c75SAmlan Barua } 6993c3a9c75SAmlan Barua 7003c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 7013c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 7023c3a9c75SAmlan Barua 703f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 704f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 705f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 706f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 707b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 708b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 709b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 710b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 711bd59e6c6SAmlan Barua #endif 7129496c9aeSAmlan Barua break; 7133c3a9c75SAmlan Barua 7143c3a9c75SAmlan Barua case 3: 715bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 716bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 717bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 718bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 719bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 720bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 721bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 722bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 723bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 724bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 725bd59e6c6SAmlan Barua #else 72651d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 72751d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 72851d1eed7SAmlan Barua 7299496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 7309496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 73151d1eed7SAmlan Barua 73251d1eed7SAmlan Barua if (dim[2]%2==0) 73351d1eed7SAmlan Barua NM = dim[2]+2; 73451d1eed7SAmlan Barua else 73551d1eed7SAmlan Barua NM = dim[2]+1; 73651d1eed7SAmlan Barua 73751d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 73851d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 73951d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 74051d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 74151d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 74251d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 74351d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 74451d1eed7SAmlan Barua } 74551d1eed7SAmlan Barua } 74651d1eed7SAmlan Barua } 74751d1eed7SAmlan Barua 74851d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 74951d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 75051d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 75151d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 75251d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 75351d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 754b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 755b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 756b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 757b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 758bd59e6c6SAmlan Barua #endif 7599496c9aeSAmlan Barua break; 7603c3a9c75SAmlan Barua 7613c3a9c75SAmlan Barua default: 762bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 763bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 764bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 765bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 766bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 767bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 768bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 769bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 770bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 771bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 772bd59e6c6SAmlan Barua #else 773e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 774e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 775e5d7f247SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 776e5d7f247SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 777e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 778e5d7f247SAmlan Barua 779e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 780e5d7f247SAmlan Barua 781e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 782e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 783e5d7f247SAmlan Barua 784e5d7f247SAmlan Barua if (dim[ndim-1]%2==0) 785ba6e06d0SAmlan Barua NM = 2; 786e5d7f247SAmlan Barua else 787ba6e06d0SAmlan Barua NM = 1; 788e5d7f247SAmlan Barua 7896971673cSAmlan Barua j = low; 7906971673cSAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 7916971673cSAmlan Barua { 7926971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 7936971673cSAmlan Barua indx2[i] = j; 7946971673cSAmlan Barua if (k%dim[ndim-1]==0) 7956971673cSAmlan Barua { j+=NM;} 7966971673cSAmlan Barua j++; 7976971673cSAmlan Barua } 7986971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 7996971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 8006971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 8016971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8026971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8036971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 804b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 805b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 806b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 807b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 808bd59e6c6SAmlan Barua #endif 8099496c9aeSAmlan Barua break; 8103c3a9c75SAmlan Barua } 811e81bb599SAmlan Barua } 8121abc6020SAmlan Barua return(0); 8133c3a9c75SAmlan Barua } 8143c3a9c75SAmlan Barua EXTERN_C_END 8153c3a9c75SAmlan Barua 8163c3a9c75SAmlan Barua #undef __FUNCT__ 81774a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc" 81874a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 8193c3a9c75SAmlan Barua { 8203c3a9c75SAmlan Barua PetscErrorCode ierr; 8213c3a9c75SAmlan Barua PetscFunctionBegin; 82274a26884SAmlan Barua ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 8233c3a9c75SAmlan Barua PetscFunctionReturn(0); 8243c3a9c75SAmlan Barua } 82554efbe56SHong Zhang 8265b3e41ffSAmlan Barua /* 82797504071SAmlan Barua VecScatterFFTWToPetsc_FFTW - Converts FFTW output to the PETSc vector that user can use. 8285b3e41ffSAmlan Barua Input A, x, y 8295b3e41ffSAmlan Barua A - FFTW matrix 8305b3e41ffSAmlan Barua x - FFTW vector 8315b3e41ffSAmlan Barua y - PETSc vector that user can read 8325b3e41ffSAmlan Barua 8335b3e41ffSAmlan Barua Level: intermediate 83497504071SAmlan Barua Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 83597504071SAmlan Barua VecScatterFFTWToPetsc removes those extra zeros. 8365b3e41ffSAmlan Barua 8373c3a9c75SAmlan Barua */ 8383c3a9c75SAmlan Barua 8393c3a9c75SAmlan Barua EXTERN_C_BEGIN 8403c3a9c75SAmlan Barua #undef __FUNCT__ 84174a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 84274a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 8435b3e41ffSAmlan Barua { 8445b3e41ffSAmlan Barua PetscErrorCode ierr; 8455b3e41ffSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 8465b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 8475b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 8489496c9aeSAmlan Barua PetscInt N=fft->N; 849b3655f67SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 8509496c9aeSAmlan Barua PetscInt low; 8519496c9aeSAmlan Barua PetscInt rank,size; 8525b3e41ffSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 8539496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 8545b3e41ffSAmlan Barua VecScatter vecscat; 8555b3e41ffSAmlan Barua IS list1,list2; 8569496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 8579496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 8589496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 8599496c9aeSAmlan Barua PetscInt N1, n1 ,NM; 8609496c9aeSAmlan Barua ptrdiff_t temp; 8619496c9aeSAmlan Barua #endif 8629496c9aeSAmlan Barua 8635b3e41ffSAmlan Barua 8645b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 8655b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 866b3655f67SAmlan Barua ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr); 8675b3e41ffSAmlan Barua 868e81bb599SAmlan Barua if (size==1){ 869b3655f67SAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 8706971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 8716971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8726971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8736971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 874b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 875e81bb599SAmlan Barua } 876e81bb599SAmlan Barua else{ 877e81bb599SAmlan Barua 878e81bb599SAmlan Barua switch (ndim){ 879e81bb599SAmlan Barua case 1: 88064657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 88164657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 8829496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1); 8839496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2); 88464657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 88564657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 88664657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 88764657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 88864657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 88964657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 89064657d84SAmlan Barua #else 8916a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 89264657d84SAmlan Barua #endif 8935b3e41ffSAmlan Barua break; 8945b3e41ffSAmlan Barua case 2: 895bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 896bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 897bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 898bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 899bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 900bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 901bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 902bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 903bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 904bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 905bd59e6c6SAmlan Barua #else 9065b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 9075b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 9085b3e41ffSAmlan Barua 9099496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 9109496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 9115b3e41ffSAmlan Barua 9125b3e41ffSAmlan Barua if (dim[1]%2==0) 9135b3e41ffSAmlan Barua NM = dim[1]+2; 9145b3e41ffSAmlan Barua else 9155b3e41ffSAmlan Barua NM = dim[1]+1; 9165b3e41ffSAmlan Barua 9175b3e41ffSAmlan Barua for (i=0;i<local_n0;i++){ 9185b3e41ffSAmlan Barua for (j=0;j<dim[1];j++){ 9195b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 9205b3e41ffSAmlan Barua tempindx1 = i*NM + j; 9215b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 9225b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 9235b3e41ffSAmlan Barua } 9245b3e41ffSAmlan Barua } 9255b3e41ffSAmlan Barua 9265b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 9275b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 9285b3e41ffSAmlan Barua 9295b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 9305b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9315b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9325b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 933b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 934b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 935b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 936b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 937bd59e6c6SAmlan Barua #endif 9389496c9aeSAmlan Barua break; 9395b3e41ffSAmlan Barua case 3: 940bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 941bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 942bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 943bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 944bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 945bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 946bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 947bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 948bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 949bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 950bd59e6c6SAmlan Barua #else 951bd59e6c6SAmlan Barua 95251d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 95351d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 95451d1eed7SAmlan Barua 9559496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 9569496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 9579496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 9589496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 95951d1eed7SAmlan Barua 96051d1eed7SAmlan Barua if (dim[2]%2==0) 96151d1eed7SAmlan Barua NM = dim[2]+2; 96251d1eed7SAmlan Barua else 96351d1eed7SAmlan Barua NM = dim[2]+1; 96451d1eed7SAmlan Barua 96551d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 96651d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 96751d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 96851d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 96951d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 97051d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 97151d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 97251d1eed7SAmlan Barua } 97351d1eed7SAmlan Barua } 97451d1eed7SAmlan Barua } 97551d1eed7SAmlan Barua 97651d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 97751d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 97851d1eed7SAmlan Barua 97951d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 98051d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 98151d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 98251d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 983b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 984b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 985b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 986b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 987bd59e6c6SAmlan Barua #endif 9889496c9aeSAmlan Barua break; 9895b3e41ffSAmlan Barua default: 990bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 991bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 992bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 993bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 994bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 995bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 996bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 997bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 998bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 999bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1000bd59e6c6SAmlan Barua #else 1001ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1002ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1003ba6e06d0SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1004ba6e06d0SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1005ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1006ba6e06d0SAmlan Barua 1007ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1008ba6e06d0SAmlan Barua 1009ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1010ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1011ba6e06d0SAmlan Barua 1012ba6e06d0SAmlan Barua if (dim[ndim-1]%2==0) 1013ba6e06d0SAmlan Barua NM = 2; 1014ba6e06d0SAmlan Barua else 1015ba6e06d0SAmlan Barua NM = 1; 1016ba6e06d0SAmlan Barua 1017ba6e06d0SAmlan Barua j = low; 1018ba6e06d0SAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1019ba6e06d0SAmlan Barua { 1020ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1021ba6e06d0SAmlan Barua indx2[i] = j; 1022ba6e06d0SAmlan Barua if (k%dim[ndim-1]==0) 1023ba6e06d0SAmlan Barua { j+=NM;} 1024ba6e06d0SAmlan Barua j++; 1025ba6e06d0SAmlan Barua } 1026ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1027ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1028ba6e06d0SAmlan Barua 1029ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1030ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1031ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1033b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1034b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1035b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1036b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1037bd59e6c6SAmlan Barua #endif 10389496c9aeSAmlan Barua break; 10395b3e41ffSAmlan Barua } 1040e81bb599SAmlan Barua } 10415b3e41ffSAmlan Barua return 0; 10425b3e41ffSAmlan Barua } 10435b3e41ffSAmlan Barua EXTERN_C_END 10445b3e41ffSAmlan Barua 10455b3e41ffSAmlan Barua EXTERN_C_BEGIN 10465b3e41ffSAmlan Barua #undef __FUNCT__ 10473c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 10483c3a9c75SAmlan Barua /* 10493c3a9c75SAmlan Barua MatCreate_FFTW - Creates a matrix object that provides FFT 10503c3a9c75SAmlan Barua via the external package FFTW 10513c3a9c75SAmlan Barua Options Database Keys: 10523c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 10533c3a9c75SAmlan Barua 10543c3a9c75SAmlan Barua Level: intermediate 10553c3a9c75SAmlan Barua 10563c3a9c75SAmlan Barua */ 10573c3a9c75SAmlan Barua 1058b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 1059b2b77a04SHong Zhang { 1060b2b77a04SHong Zhang PetscErrorCode ierr; 1061b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 1062b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1063b2b77a04SHong Zhang Mat_FFTW *fftw; 1064b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1065b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1066b2b77a04SHong Zhang PetscBool flg; 10674f48bc67SAmlan Barua PetscInt p_flag,partial_dim=1,ctr; 106811902ff2SHong Zhang PetscMPIInt size,rank; 10699496c9aeSAmlan Barua ptrdiff_t *pdim; 10709496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 10719496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 10729496c9aeSAmlan Barua ptrdiff_t temp; 10738ca4c034SAmlan Barua PetscInt N1,tot_dim; 10744f48bc67SAmlan Barua #else 10754f48bc67SAmlan Barua PetscInt n1; 10769496c9aeSAmlan Barua #endif 10779496c9aeSAmlan Barua 1078b2b77a04SHong Zhang PetscFunctionBegin; 1079b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 108011902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1081c92418ccSAmlan Barua 10821878d478SAmlan Barua fftw_mpi_init(); 108311902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 108411902ff2SHong Zhang pdim[0] = dim[0]; 10858ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 10868ca4c034SAmlan Barua tot_dim = 2*dim[0]; 10878ca4c034SAmlan Barua #endif 108811902ff2SHong Zhang for (ctr=1;ctr<ndim;ctr++) 108911902ff2SHong Zhang { 10906882372aSHong Zhang partial_dim *= dim[ctr]; 109111902ff2SHong Zhang pdim[ctr] = dim[ctr]; 10928ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 10938ca4c034SAmlan Barua if (ctr==ndim-1) 10948ca4c034SAmlan Barua tot_dim *= (dim[ctr]/2+1); 10958ca4c034SAmlan Barua else 10968ca4c034SAmlan Barua tot_dim *= dim[ctr]; 10978ca4c034SAmlan Barua #endif 10986882372aSHong Zhang } 10993c3a9c75SAmlan Barua 1100b2b77a04SHong Zhang if (size == 1) { 1101e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1102b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1103b2b77a04SHong Zhang n = N; 1104e81bb599SAmlan Barua #else 1105e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1106e81bb599SAmlan Barua n = tot_dim; 1107e81bb599SAmlan Barua #endif 1108e81bb599SAmlan Barua 1109b2b77a04SHong Zhang } else { 11101abc6020SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 1111b2b77a04SHong Zhang switch (ndim){ 1112b2b77a04SHong Zhang case 1: 11133c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11143c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1115e5d7f247SAmlan Barua #else 11169496c9aeSAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 11176882372aSHong Zhang n = (PetscInt)local_n0; 11189496c9aeSAmlan Barua n1 = (PetscInt) local_n1; 11199496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1120e5d7f247SAmlan Barua #endif 1121b2b77a04SHong Zhang break; 1122b2b77a04SHong Zhang case 2: 11235b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 1124b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1125b2b77a04SHong Zhang /* 1126b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1127b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 1128b2b77a04SHong Zhang */ 1129b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1130b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 11315b3e41ffSAmlan Barua #else 11325b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 11335b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1134c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 11355b3e41ffSAmlan Barua #endif 1136b2b77a04SHong Zhang break; 1137b2b77a04SHong Zhang case 3: 113851d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 113951d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 11406882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 11416882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 114251d1eed7SAmlan Barua #else 114351d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 114451d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1145c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 114651d1eed7SAmlan Barua #endif 1147b2b77a04SHong Zhang break; 1148b2b77a04SHong Zhang default: 1149b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 115011902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 11516882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 11526882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1153b3a17365SAmlan Barua #else 1154b3a17365SAmlan Barua temp = pdim[ndim-1]; 1155b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 1156b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1157b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1158b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1159b3a17365SAmlan Barua pdim[ndim-1] = temp; 1160c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1161b3a17365SAmlan Barua #endif 1162b2b77a04SHong Zhang break; 1163b2b77a04SHong Zhang } 1164b2b77a04SHong Zhang } 1165b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1166b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1167b2b77a04SHong Zhang fft->data = (void*)fftw; 1168b2b77a04SHong Zhang 1169b2b77a04SHong Zhang fft->n = n; 1170c92418ccSAmlan Barua fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1171e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 1172c92418ccSAmlan Barua ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1173b1301b2eSHong Zhang for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1174c92418ccSAmlan Barua 1175b2b77a04SHong Zhang fftw->p_forward = 0; 1176b2b77a04SHong Zhang fftw->p_backward = 0; 1177b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1178b2b77a04SHong Zhang 1179b2b77a04SHong Zhang if (size == 1){ 1180b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1181b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1182b2b77a04SHong Zhang } else { 1183b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1184b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1185b2b77a04SHong Zhang } 1186b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1187b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 11884be45526SHong Zhang PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW); 118974a26884SAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW); 119074a26884SAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW); 1191b2b77a04SHong Zhang 1192b2b77a04SHong Zhang /* get runtime options */ 1193b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1194b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1195b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 1196b2b77a04SHong Zhang PetscOptionsEnd(); 1197b2b77a04SHong Zhang PetscFunctionReturn(0); 1198b2b77a04SHong Zhang } 1199b2b77a04SHong Zhang EXTERN_C_END 12003c3a9c75SAmlan Barua 12013c3a9c75SAmlan Barua 12023c3a9c75SAmlan Barua 12033c3a9c75SAmlan Barua 1204