1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14e5d7f247SAmlan Barua PetscInt partial_dim; 15b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 16b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 19b2b77a04SHong Zhang } Mat_FFTW; 20b2b77a04SHong Zhang 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27b2b77a04SHong Zhang 2897504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel 2997504071SAmlan Barua Input parameter: 303564f093SHong Zhang A - the matrix 3197504071SAmlan Barua x - the vector on which FDFT will be performed 3297504071SAmlan Barua 3397504071SAmlan Barua Output parameter: 3497504071SAmlan Barua y - vector that stores result of FDFT 3597504071SAmlan Barua */ 36b2b77a04SHong Zhang #undef __FUNCT__ 37b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 38b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 39b2b77a04SHong Zhang { 40b2b77a04SHong Zhang PetscErrorCode ierr; 41b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 42b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 43b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 44b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 45b2b77a04SHong Zhang 46b2b77a04SHong Zhang PetscFunctionBegin; 47b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 48b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 49b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 50b2b77a04SHong Zhang switch (ndim) { 51b2b77a04SHong Zhang case 1: 5258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 53b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 5458a912c5SAmlan Barua #else 5558a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 5658a912c5SAmlan Barua #endif 57b2b77a04SHong Zhang break; 58b2b77a04SHong Zhang case 2: 5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 60b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6158a912c5SAmlan Barua #else 6258a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 6358a912c5SAmlan Barua #endif 64b2b77a04SHong Zhang break; 65b2b77a04SHong Zhang case 3: 6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 67b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6858a912c5SAmlan Barua #else 6958a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 7058a912c5SAmlan Barua #endif 71b2b77a04SHong Zhang break; 72b2b77a04SHong Zhang default: 7358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 74b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 7558a912c5SAmlan Barua #else 7658a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 7758a912c5SAmlan Barua #endif 78b2b77a04SHong Zhang break; 79b2b77a04SHong Zhang } 80b2b77a04SHong Zhang fftw->finarray = x_array; 81b2b77a04SHong Zhang fftw->foutarray = y_array; 82b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 83b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 84b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 85b2b77a04SHong Zhang } else { /* use existing plan */ 86b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 87b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 88b2b77a04SHong Zhang } else { 89b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 90b2b77a04SHong Zhang } 91b2b77a04SHong Zhang } 92b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 93b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 94b2b77a04SHong Zhang PetscFunctionReturn(0); 95b2b77a04SHong Zhang } 96b2b77a04SHong Zhang 9797504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT 9897504071SAmlan Barua Input parameter: 993564f093SHong Zhang A - the matrix 10097504071SAmlan Barua x - the vector on which BDFT will be performed 10197504071SAmlan Barua 10297504071SAmlan Barua Output parameter: 10397504071SAmlan Barua y - vector that stores result of BDFT 10497504071SAmlan Barua */ 10597504071SAmlan Barua 106b2b77a04SHong Zhang #undef __FUNCT__ 107b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 108b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 109b2b77a04SHong Zhang { 110b2b77a04SHong Zhang PetscErrorCode ierr; 111b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 112b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 113b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 114b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 115b2b77a04SHong Zhang 116b2b77a04SHong Zhang PetscFunctionBegin; 117b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 118b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 119b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 120b2b77a04SHong Zhang switch (ndim) { 121b2b77a04SHong Zhang case 1: 12258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 123b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12458a912c5SAmlan Barua #else 125e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 12658a912c5SAmlan Barua #endif 127b2b77a04SHong Zhang break; 128b2b77a04SHong Zhang case 2: 12958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 130b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 13158a912c5SAmlan Barua #else 132e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 13358a912c5SAmlan Barua #endif 134b2b77a04SHong Zhang break; 135b2b77a04SHong Zhang case 3: 13658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 137b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 13858a912c5SAmlan Barua #else 139e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 14058a912c5SAmlan Barua #endif 141b2b77a04SHong Zhang break; 142b2b77a04SHong Zhang default: 14358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 144b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 14558a912c5SAmlan Barua #else 146e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 14758a912c5SAmlan Barua #endif 148b2b77a04SHong Zhang break; 149b2b77a04SHong Zhang } 150b2b77a04SHong Zhang fftw->binarray = x_array; 151b2b77a04SHong Zhang fftw->boutarray = y_array; 152b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 153b2b77a04SHong Zhang } else { /* use existing plan */ 154b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 155b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 156b2b77a04SHong Zhang } else { 157b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 158b2b77a04SHong Zhang } 159b2b77a04SHong Zhang } 160b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 161b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 162b2b77a04SHong Zhang PetscFunctionReturn(0); 163b2b77a04SHong Zhang } 164b2b77a04SHong Zhang 16597504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel 16697504071SAmlan Barua Input parameter: 1673564f093SHong Zhang A - the matrix 16897504071SAmlan Barua x - the vector on which FDFT will be performed 16997504071SAmlan Barua 17097504071SAmlan Barua Output parameter: 17197504071SAmlan Barua y - vector that stores result of FDFT 17297504071SAmlan Barua */ 173b2b77a04SHong Zhang #undef __FUNCT__ 174b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 175b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 176b2b77a04SHong Zhang { 177b2b77a04SHong Zhang PetscErrorCode ierr; 178b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 179b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 180b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 181c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 182*ce94432eSBarry Smith MPI_Comm comm; 183b2b77a04SHong Zhang 184b2b77a04SHong Zhang PetscFunctionBegin; 185*ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 186b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 187b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 188b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 189b2b77a04SHong Zhang switch (ndim) { 190b2b77a04SHong Zhang case 1: 1913c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 192b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 193ae0a50aaSHong Zhang #else 1944f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 1953c3a9c75SAmlan Barua #endif 196b2b77a04SHong Zhang break; 197b2b77a04SHong Zhang case 2: 19897504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 199b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2003c3a9c75SAmlan Barua #else 2013c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2023c3a9c75SAmlan Barua #endif 203b2b77a04SHong Zhang break; 204b2b77a04SHong Zhang case 3: 2053c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 206b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2073c3a9c75SAmlan Barua #else 20851d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2093c3a9c75SAmlan Barua #endif 210b2b77a04SHong Zhang break; 211b2b77a04SHong Zhang default: 2123c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 213c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2143c3a9c75SAmlan Barua #else 2153c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2163c3a9c75SAmlan Barua #endif 217b2b77a04SHong Zhang break; 218b2b77a04SHong Zhang } 219b2b77a04SHong Zhang fftw->finarray = x_array; 220b2b77a04SHong Zhang fftw->foutarray = y_array; 221b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 222b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 223b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 224b2b77a04SHong Zhang } else { /* use existing plan */ 225b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 226b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 227b2b77a04SHong Zhang } else { 228b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 229b2b77a04SHong Zhang } 230b2b77a04SHong Zhang } 231b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 232b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 233b2b77a04SHong Zhang PetscFunctionReturn(0); 234b2b77a04SHong Zhang } 235b2b77a04SHong Zhang 23697504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT 23797504071SAmlan Barua Input parameter: 2383564f093SHong Zhang A - the matrix 23997504071SAmlan Barua x - the vector on which BDFT will be performed 24097504071SAmlan Barua 24197504071SAmlan Barua Output parameter: 24297504071SAmlan Barua y - vector that stores result of BDFT 24397504071SAmlan Barua */ 244b2b77a04SHong Zhang #undef __FUNCT__ 245b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 246b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 247b2b77a04SHong Zhang { 248b2b77a04SHong Zhang PetscErrorCode ierr; 249b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 250b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 251b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 252c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 253*ce94432eSBarry Smith MPI_Comm comm; 254b2b77a04SHong Zhang 255b2b77a04SHong Zhang PetscFunctionBegin; 256*ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 257b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 258b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 259b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 260b2b77a04SHong Zhang switch (ndim) { 261b2b77a04SHong Zhang case 1: 2623c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 263b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 264ae0a50aaSHong Zhang #else 2654f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 2663c3a9c75SAmlan Barua #endif 267b2b77a04SHong Zhang break; 268b2b77a04SHong Zhang case 2: 26997504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 270b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2713c3a9c75SAmlan Barua #else 2723c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 2733c3a9c75SAmlan Barua #endif 274b2b77a04SHong Zhang break; 275b2b77a04SHong Zhang case 3: 2763c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 277b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2783c3a9c75SAmlan Barua #else 2793c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 2803c3a9c75SAmlan Barua #endif 281b2b77a04SHong Zhang break; 282b2b77a04SHong Zhang default: 2833c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 284c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2853c3a9c75SAmlan Barua #else 2863c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 2873c3a9c75SAmlan Barua #endif 288b2b77a04SHong Zhang break; 289b2b77a04SHong Zhang } 290b2b77a04SHong Zhang fftw->binarray = x_array; 291b2b77a04SHong Zhang fftw->boutarray = y_array; 292b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 293b2b77a04SHong Zhang } else { /* use existing plan */ 294b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 295b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 296b2b77a04SHong Zhang } else { 297b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 298b2b77a04SHong Zhang } 299b2b77a04SHong Zhang } 300b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 301b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 302b2b77a04SHong Zhang PetscFunctionReturn(0); 303b2b77a04SHong Zhang } 304b2b77a04SHong Zhang 305b2b77a04SHong Zhang #undef __FUNCT__ 306b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 307b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 308b2b77a04SHong Zhang { 309b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 310b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 311b2b77a04SHong Zhang PetscErrorCode ierr; 312b2b77a04SHong Zhang 313b2b77a04SHong Zhang PetscFunctionBegin; 314b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 315b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 316b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 317bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 3186ccf0b3eSHong Zhang fftw_mpi_cleanup(); 319b2b77a04SHong Zhang PetscFunctionReturn(0); 320b2b77a04SHong Zhang } 321b2b77a04SHong Zhang 322c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 323b2b77a04SHong Zhang #undef __FUNCT__ 324b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 325b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 326b2b77a04SHong Zhang { 327b2b77a04SHong Zhang PetscErrorCode ierr; 328b2b77a04SHong Zhang PetscScalar *array; 329b2b77a04SHong Zhang 330b2b77a04SHong Zhang PetscFunctionBegin; 331b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 332b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 333b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 334b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 335b2b77a04SHong Zhang PetscFunctionReturn(0); 336b2b77a04SHong Zhang } 337b2b77a04SHong Zhang 3384f7415efSAmlan Barua #undef __FUNCT__ 3394be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW" 3404be45526SHong Zhang /*@ 341b2aa233eSHong Zhang MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 3424f7415efSAmlan Barua parallel layout determined by FFTW 3434f7415efSAmlan Barua 3444f7415efSAmlan Barua Collective on Mat 3454f7415efSAmlan Barua 3464f7415efSAmlan Barua Input Parameter: 3473564f093SHong Zhang . A - the matrix 3484f7415efSAmlan Barua 3494f7415efSAmlan Barua Output Parameter: 350cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW 351cc55f3b1SHong Zhang - y - (optional) output vector of forward FFTW 352cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW 3534f7415efSAmlan Barua 3544f7415efSAmlan Barua Level: advanced 3553564f093SHong Zhang 35697504071SAmlan Barua Note: The parallel layout of output of forward FFTW is always same as the input 35797504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 35897504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 35997504071SAmlan Barua Also note that we need to provide enough space while doing parallel real transform. 36097504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 36197504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 362e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 363e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two 364e0ec536eSAmlan Barua zeros if it is odd only one zero is needed. 365e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local 366e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for 367e0ec536eSAmlan Barua each processor and returns that. 3684f7415efSAmlan Barua 3694f7415efSAmlan Barua .seealso: MatCreateFFTW() 3704be45526SHong Zhang @*/ 3714be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 3724be45526SHong Zhang { 3734be45526SHong Zhang PetscErrorCode ierr; 374b4c3921fSHong Zhang 3754be45526SHong Zhang PetscFunctionBegin; 3764be45526SHong Zhang ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 3774be45526SHong Zhang PetscFunctionReturn(0); 3784be45526SHong Zhang } 3794be45526SHong Zhang 3804f7415efSAmlan Barua EXTERN_C_BEGIN 3814be45526SHong Zhang #undef __FUNCT__ 3824be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW" 3834be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 3844f7415efSAmlan Barua { 3854f7415efSAmlan Barua PetscErrorCode ierr; 3864f7415efSAmlan Barua PetscMPIInt size,rank; 387*ce94432eSBarry Smith MPI_Comm comm; 3884f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 3894f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 3909496c9aeSAmlan Barua PetscInt N = fft->N; 3914f7415efSAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 3924f7415efSAmlan Barua 3934f7415efSAmlan Barua PetscFunctionBegin; 3944f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 3954f7415efSAmlan Barua PetscValidType(A,1); 396*ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 3974f7415efSAmlan Barua 3984f8276c3SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 3994f8276c3SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 4004f7415efSAmlan Barua if (size == 1) { /* sequential case */ 4014f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4024f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 4034f7415efSAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 4044f7415efSAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 4054f7415efSAmlan Barua #else 4068ca4c034SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 4078ca4c034SAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 4088ca4c034SAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 4094f7415efSAmlan Barua #endif 41097504071SAmlan Barua } else { /* parallel cases */ 4114f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 4129496c9aeSAmlan Barua ptrdiff_t local_n1; 4139496c9aeSAmlan Barua fftw_complex *data_fout; 4149496c9aeSAmlan Barua ptrdiff_t local_1_start; 4159496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4169496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 4179496c9aeSAmlan Barua #else 4184f7415efSAmlan Barua double *data_finr,*data_boutr; 4196ccf0b3eSHong Zhang PetscInt n1,N1; 4209496c9aeSAmlan Barua ptrdiff_t temp; 4219496c9aeSAmlan Barua #endif 4229496c9aeSAmlan Barua 4234f7415efSAmlan Barua switch (ndim) { 4244f7415efSAmlan Barua case 1: 42557625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4264f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 42757625b84SAmlan Barua #else 42857625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 42957625b84SAmlan Barua if (fin) { 43057625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 431778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 43226fbe8dcSKarl Rupp 43357625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 43457625b84SAmlan Barua } 43557625b84SAmlan Barua if (fout) { 43657625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 437778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 43826fbe8dcSKarl Rupp 43957625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 44057625b84SAmlan Barua } 44157625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 44257625b84SAmlan Barua if (bout) { 44357625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 444778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 44526fbe8dcSKarl Rupp 44657625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 44757625b84SAmlan Barua } 44857625b84SAmlan Barua break; 44957625b84SAmlan Barua #endif 4504f7415efSAmlan Barua case 2: 45197504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 4524f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4534f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 4544f7415efSAmlan Barua if (fin) { 4554f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 456778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 45726fbe8dcSKarl Rupp 4584f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4594f7415efSAmlan Barua } 4604f7415efSAmlan Barua if (bout) { 4614f7415efSAmlan Barua data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 462778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 46326fbe8dcSKarl Rupp 4644f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4654f7415efSAmlan Barua } 46657625b84SAmlan Barua if (fout) { 46757625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 468778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 46926fbe8dcSKarl Rupp 47057625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 47157625b84SAmlan Barua } 4724f7415efSAmlan Barua #else 4734f7415efSAmlan Barua /* Get local size */ 4744f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 4754f7415efSAmlan Barua if (fin) { 4764f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 477778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 47826fbe8dcSKarl Rupp 4794f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4804f7415efSAmlan Barua } 4814f7415efSAmlan Barua if (fout) { 4824f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 483778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 48426fbe8dcSKarl Rupp 4854f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4864f7415efSAmlan Barua } 4874f7415efSAmlan Barua if (bout) { 4884f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 489778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 49026fbe8dcSKarl Rupp 4914f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4924f7415efSAmlan Barua } 4934f7415efSAmlan Barua #endif 4944f7415efSAmlan Barua break; 4954f7415efSAmlan Barua case 3: 4964f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4974f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4984f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 4994f7415efSAmlan Barua if (fin) { 5004f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 501778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 50226fbe8dcSKarl Rupp 5034f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5044f7415efSAmlan Barua } 5054f7415efSAmlan Barua if (bout) { 5064f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 507778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 50826fbe8dcSKarl Rupp 5094f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5104f7415efSAmlan Barua } 5113564f093SHong Zhang 51257625b84SAmlan Barua if (fout) { 51357625b84SAmlan Barua data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 514778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 51526fbe8dcSKarl Rupp 51657625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 51757625b84SAmlan Barua } 5184f7415efSAmlan Barua #else 5190c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 5200c9b18e4SAmlan Barua if (fin) { 5210c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 522778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 52326fbe8dcSKarl Rupp 5240c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5250c9b18e4SAmlan Barua } 5260c9b18e4SAmlan Barua if (fout) { 5270c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 528778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 52926fbe8dcSKarl Rupp 5300c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5310c9b18e4SAmlan Barua } 5320c9b18e4SAmlan Barua if (bout) { 5330c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 534778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 53526fbe8dcSKarl Rupp 5360c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5370c9b18e4SAmlan Barua } 5384f7415efSAmlan Barua #endif 5394f7415efSAmlan Barua break; 5404f7415efSAmlan Barua default: 5414f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5424f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 54326fbe8dcSKarl Rupp 5444f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 54526fbe8dcSKarl Rupp 5464f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5474f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 54826fbe8dcSKarl Rupp 5494f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 5504f7415efSAmlan Barua 5514f7415efSAmlan Barua if (fin) { 5524f7415efSAmlan Barua data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 553778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 55426fbe8dcSKarl Rupp 5554f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5564f7415efSAmlan Barua } 5574f7415efSAmlan Barua if (bout) { 5584f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 559778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 56026fbe8dcSKarl Rupp 5619496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5624f7415efSAmlan Barua } 5633564f093SHong Zhang 56457625b84SAmlan Barua if (fout) { 56557625b84SAmlan Barua data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 566778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 56726fbe8dcSKarl Rupp 56857625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 56957625b84SAmlan Barua } 5704f7415efSAmlan Barua #else 5710c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 5720c9b18e4SAmlan Barua if (fin) { 5730c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 574778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 57526fbe8dcSKarl Rupp 5760c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5770c9b18e4SAmlan Barua } 5780c9b18e4SAmlan Barua if (fout) { 5790c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 580778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 58126fbe8dcSKarl Rupp 5820c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5830c9b18e4SAmlan Barua } 5840c9b18e4SAmlan Barua if (bout) { 5850c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 586778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 58726fbe8dcSKarl Rupp 5880c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5890c9b18e4SAmlan Barua } 5904f7415efSAmlan Barua #endif 5914f7415efSAmlan Barua break; 5924f7415efSAmlan Barua } 5934f7415efSAmlan Barua } 5944f7415efSAmlan Barua PetscFunctionReturn(0); 5954f7415efSAmlan Barua } 5964f7415efSAmlan Barua EXTERN_C_END 5974f7415efSAmlan Barua 598c92418ccSAmlan Barua #undef __FUNCT__ 59974a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW" 6003564f093SHong Zhang /*@ 6013564f093SHong Zhang VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 60254efbe56SHong Zhang 6033564f093SHong Zhang Collective on Mat 6043564f093SHong Zhang 6053564f093SHong Zhang Input Parameters: 6063564f093SHong Zhang + A - FFTW matrix 6073564f093SHong Zhang - x - the PETSc vector 6083564f093SHong Zhang 6093564f093SHong Zhang Output Parameters: 6103564f093SHong Zhang . y - the FFTW vector 6113564f093SHong Zhang 612b2b77a04SHong Zhang Options Database Keys: 6133564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags 614b2b77a04SHong Zhang 615b2b77a04SHong Zhang Level: intermediate 6163564f093SHong Zhang 61797504071SAmlan Barua Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 61897504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 61997504071SAmlan Barua zeros. This routine does that job by scattering operation. 62097504071SAmlan Barua 6213564f093SHong Zhang .seealso: VecScatterFFTWToPetsc() 6223564f093SHong Zhang @*/ 6233564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 6243564f093SHong Zhang { 6253564f093SHong Zhang PetscErrorCode ierr; 626b2b77a04SHong Zhang 6273564f093SHong Zhang PetscFunctionBegin; 6283564f093SHong Zhang ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 6293564f093SHong Zhang PetscFunctionReturn(0); 6303564f093SHong Zhang } 6313c3a9c75SAmlan Barua 6323c3a9c75SAmlan Barua EXTERN_C_BEGIN 6333c3a9c75SAmlan Barua #undef __FUNCT__ 6341986ecc6SHong Zhang #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 63574a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 6363c3a9c75SAmlan Barua { 6373c3a9c75SAmlan Barua PetscErrorCode ierr; 638*ce94432eSBarry Smith MPI_Comm comm; 6393c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 6403c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 6419496c9aeSAmlan Barua PetscInt N =fft->N; 642b5d11533SAmlan Barua PetscInt ndim =fft->ndim,*dim=fft->dim; 6439496c9aeSAmlan Barua PetscInt low; 6448ca4c034SAmlan Barua PetscInt rank,size,vsize,vsize1; 6453c3a9c75SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 6469496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 6473c3a9c75SAmlan Barua VecScatter vecscat; 6483c3a9c75SAmlan Barua IS list1,list2; 6499496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6509496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 6519496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 6529496c9aeSAmlan Barua PetscInt N1, n1,NM; 6539496c9aeSAmlan Barua ptrdiff_t temp; 6549496c9aeSAmlan Barua #endif 6553c3a9c75SAmlan Barua 6563564f093SHong Zhang PetscFunctionBegin; 657*ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 658f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 659f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 6600298fd71SBarry Smith ierr = VecGetOwnershipRange(y,&low,NULL); 6613c3a9c75SAmlan Barua 6623564f093SHong Zhang if (size==1) { 6638ca4c034SAmlan Barua ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 6648ca4c034SAmlan Barua ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 6659496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 6666971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 6676971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6686971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6696971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 670b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 6713564f093SHong Zhang } else { 6723c3a9c75SAmlan Barua switch (ndim) { 6733c3a9c75SAmlan Barua case 1: 67464657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 67564657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 67626fbe8dcSKarl Rupp 6774f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 6784f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,low,1,&list2); 67964657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 68064657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 68164657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 68264657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 68364657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 68464657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 68564657d84SAmlan Barua #else 686e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 68764657d84SAmlan Barua #endif 6883c3a9c75SAmlan Barua break; 6893c3a9c75SAmlan Barua case 2: 690bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 691bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 69226fbe8dcSKarl Rupp 6934f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 6944f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 695bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 696bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 697bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 698bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 699bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 700bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 701bd59e6c6SAmlan Barua #else 7023c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 70326fbe8dcSKarl Rupp 7043c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 7059496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 7069496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 7073c3a9c75SAmlan Barua 7083564f093SHong Zhang if (dim[1]%2==0) { 7093c3a9c75SAmlan Barua NM = dim[1]+2; 7103564f093SHong Zhang } else { 7113c3a9c75SAmlan Barua NM = dim[1]+1; 7123564f093SHong Zhang } 7133c3a9c75SAmlan Barua for (i=0; i<local_n0; i++) { 7143c3a9c75SAmlan Barua for (j=0; j<dim[1]; j++) { 7153c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 7163c3a9c75SAmlan Barua tempindx1 = i*NM + j; 71726fbe8dcSKarl Rupp 7185b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 7193c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 7203c3a9c75SAmlan Barua } 7213c3a9c75SAmlan Barua } 7223c3a9c75SAmlan Barua 7233c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 7243c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 7253c3a9c75SAmlan Barua 726f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 727f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 728f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 729f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 730b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 731b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 732b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 733b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 734bd59e6c6SAmlan Barua #endif 7359496c9aeSAmlan Barua break; 7363c3a9c75SAmlan Barua 7373c3a9c75SAmlan Barua case 3: 738bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 739bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 74026fbe8dcSKarl Rupp 7414f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 7424f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 743bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 744bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 745bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 746bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 747bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 748bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 749bd59e6c6SAmlan Barua #else 75051d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 75126fbe8dcSKarl Rupp 75251d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 75351d1eed7SAmlan Barua 7549496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 7559496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 75651d1eed7SAmlan Barua 757db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 758db4deed7SKarl Rupp else NM = dim[2]+1; 75951d1eed7SAmlan Barua 76051d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 76151d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 76251d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 76351d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 76451d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 76526fbe8dcSKarl Rupp 76651d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 76751d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 76851d1eed7SAmlan Barua } 76951d1eed7SAmlan Barua } 77051d1eed7SAmlan Barua } 77151d1eed7SAmlan Barua 77251d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 77351d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 77451d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 77551d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 77651d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 77751d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 778b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 779b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 780b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 781b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 782bd59e6c6SAmlan Barua #endif 7839496c9aeSAmlan Barua break; 7843c3a9c75SAmlan Barua 7853c3a9c75SAmlan Barua default: 786bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 787bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 78826fbe8dcSKarl Rupp 7894f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 7904f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 791bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 792bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 793bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 794bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 795bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 796bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 797bd59e6c6SAmlan Barua #else 798e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 79926fbe8dcSKarl Rupp 800e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 80126fbe8dcSKarl Rupp 802e5d7f247SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 80326fbe8dcSKarl Rupp 804e5d7f247SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 80526fbe8dcSKarl Rupp 806e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 807e5d7f247SAmlan Barua 808e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 809e5d7f247SAmlan Barua 810e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 811e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 812e5d7f247SAmlan Barua 813db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 814db4deed7SKarl Rupp else NM = 1; 815e5d7f247SAmlan Barua 8166971673cSAmlan Barua j = low; 8173564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 8186971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 8196971673cSAmlan Barua indx2[i] = j; 82026fbe8dcSKarl Rupp if (k%dim[ndim-1]==0) j+=NM; 8216971673cSAmlan Barua j++; 8226971673cSAmlan Barua } 8236971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 8246971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 8256971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 8266971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8276971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8286971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 829b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 830b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 831b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 832b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 833bd59e6c6SAmlan Barua #endif 8349496c9aeSAmlan Barua break; 8353c3a9c75SAmlan Barua } 836e81bb599SAmlan Barua } 8373564f093SHong Zhang PetscFunctionReturn(0); 8383c3a9c75SAmlan Barua } 8393c3a9c75SAmlan Barua EXTERN_C_END 8403c3a9c75SAmlan Barua 8413c3a9c75SAmlan Barua #undef __FUNCT__ 84274a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc" 8433564f093SHong Zhang /*@ 8443564f093SHong Zhang VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 8453564f093SHong Zhang 8463564f093SHong Zhang Collective on Mat 8473564f093SHong Zhang 8483564f093SHong Zhang Input Parameters: 8493564f093SHong Zhang + A - FFTW matrix 8503564f093SHong Zhang - x - FFTW vector 8513564f093SHong Zhang 8523564f093SHong Zhang Output Parameters: 8533564f093SHong Zhang . y - PETSc vector 8543564f093SHong Zhang 8553564f093SHong Zhang Level: intermediate 8563564f093SHong Zhang 8573564f093SHong Zhang Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 8583564f093SHong Zhang VecScatterFFTWToPetsc removes those extra zeros. 8593564f093SHong Zhang 8603564f093SHong Zhang .seealso: VecScatterPetscToFFTW() 8613564f093SHong Zhang @*/ 86274a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 8633c3a9c75SAmlan Barua { 8643c3a9c75SAmlan Barua PetscErrorCode ierr; 8655fd66863SKarl Rupp 8663c3a9c75SAmlan Barua PetscFunctionBegin; 86774a26884SAmlan Barua ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 8683c3a9c75SAmlan Barua PetscFunctionReturn(0); 8693c3a9c75SAmlan Barua } 87054efbe56SHong Zhang 8713c3a9c75SAmlan Barua EXTERN_C_BEGIN 8723c3a9c75SAmlan Barua #undef __FUNCT__ 87374a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 87474a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 8755b3e41ffSAmlan Barua { 8765b3e41ffSAmlan Barua PetscErrorCode ierr; 877*ce94432eSBarry Smith MPI_Comm comm; 8785b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 8795b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 8809496c9aeSAmlan Barua PetscInt N = fft->N; 881b3655f67SAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim; 8829496c9aeSAmlan Barua PetscInt low; 8839496c9aeSAmlan Barua PetscInt rank,size; 8845b3e41ffSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 8859496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 8865b3e41ffSAmlan Barua VecScatter vecscat; 8875b3e41ffSAmlan Barua IS list1,list2; 8889496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 8899496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 8909496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 8919496c9aeSAmlan Barua PetscInt N1, n1,NM; 8929496c9aeSAmlan Barua ptrdiff_t temp; 8939496c9aeSAmlan Barua #endif 8949496c9aeSAmlan Barua 8953564f093SHong Zhang PetscFunctionBegin; 896*ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 8975b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 8985b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 8990298fd71SBarry Smith ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 9005b3e41ffSAmlan Barua 901e81bb599SAmlan Barua if (size==1) { 902b3655f67SAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 9036971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 9046971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9056971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9066971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 907b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 908e81bb599SAmlan Barua 9093564f093SHong Zhang } else { 910e81bb599SAmlan Barua switch (ndim) { 911e81bb599SAmlan Barua case 1: 91264657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 91364657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 91426fbe8dcSKarl Rupp 9154f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 9164f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,low,1,&list2); 91764657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 91864657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 91964657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 92064657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 92164657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 92264657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 92364657d84SAmlan Barua #else 9246a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 92564657d84SAmlan Barua #endif 9265b3e41ffSAmlan Barua break; 9275b3e41ffSAmlan Barua case 2: 928bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 929bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 93026fbe8dcSKarl Rupp 9314f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 9324f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 933bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 934bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 935bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 936bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 937bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 938bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 939bd59e6c6SAmlan Barua #else 9405b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 94126fbe8dcSKarl Rupp 9425b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 9435b3e41ffSAmlan Barua 9449496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 9459496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 9465b3e41ffSAmlan Barua 947db4deed7SKarl Rupp if (dim[1]%2==0) NM = dim[1]+2; 948db4deed7SKarl Rupp else NM = dim[1]+1; 9495b3e41ffSAmlan Barua 9505b3e41ffSAmlan Barua for (i=0; i<local_n0; i++) { 9515b3e41ffSAmlan Barua for (j=0; j<dim[1]; j++) { 9525b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 9535b3e41ffSAmlan Barua tempindx1 = i*NM + j; 95426fbe8dcSKarl Rupp 9555b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 9565b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 9575b3e41ffSAmlan Barua } 9585b3e41ffSAmlan Barua } 9595b3e41ffSAmlan Barua 9605b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 9615b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 9625b3e41ffSAmlan Barua 9635b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 9645b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9655b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9665b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 967b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 968b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 969b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 970b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 971bd59e6c6SAmlan Barua #endif 9729496c9aeSAmlan Barua break; 9735b3e41ffSAmlan Barua case 3: 974bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 975bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 97626fbe8dcSKarl Rupp 9774f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 9784f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 979bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 980bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 981bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 982bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 983bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 984bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 985bd59e6c6SAmlan Barua #else 986bd59e6c6SAmlan Barua 98751d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 98826fbe8dcSKarl Rupp 98951d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 99051d1eed7SAmlan Barua 9919496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 9929496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 99351d1eed7SAmlan Barua 994db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 995db4deed7SKarl Rupp else NM = dim[2]+1; 99651d1eed7SAmlan Barua 99751d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 99851d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 99951d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 100051d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 100151d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 100226fbe8dcSKarl Rupp 100351d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 100451d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 100551d1eed7SAmlan Barua } 100651d1eed7SAmlan Barua } 100751d1eed7SAmlan Barua } 100851d1eed7SAmlan Barua 100951d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 101051d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 101151d1eed7SAmlan Barua 101251d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 101351d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 101451d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 101551d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1016b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1017b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1018b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1019b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1020bd59e6c6SAmlan Barua #endif 10219496c9aeSAmlan Barua break; 10225b3e41ffSAmlan Barua default: 1023bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1024bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 102526fbe8dcSKarl Rupp 10264f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 10274f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1028bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1029bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1031bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1032bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1033bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1034bd59e6c6SAmlan Barua #else 1035ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 103626fbe8dcSKarl Rupp 1037ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 103826fbe8dcSKarl Rupp 1039ba6e06d0SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 104026fbe8dcSKarl Rupp 1041ba6e06d0SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 104226fbe8dcSKarl Rupp 1043ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1044ba6e06d0SAmlan Barua 1045ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1046ba6e06d0SAmlan Barua 1047ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1048ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1049ba6e06d0SAmlan Barua 1050db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 1051db4deed7SKarl Rupp else NM = 1; 1052ba6e06d0SAmlan Barua 1053ba6e06d0SAmlan Barua j = low; 10543564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1055ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1056ba6e06d0SAmlan Barua indx2[i] = j; 10573564f093SHong Zhang if (k%dim[ndim-1]==0) j+=NM; 1058ba6e06d0SAmlan Barua j++; 1059ba6e06d0SAmlan Barua } 1060ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1061ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1062ba6e06d0SAmlan Barua 1063ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1064ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1065ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1066ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1067b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1068b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1069b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1070b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1071bd59e6c6SAmlan Barua #endif 10729496c9aeSAmlan Barua break; 10735b3e41ffSAmlan Barua } 1074e81bb599SAmlan Barua } 10753564f093SHong Zhang PetscFunctionReturn(0); 10765b3e41ffSAmlan Barua } 10775b3e41ffSAmlan Barua EXTERN_C_END 10785b3e41ffSAmlan Barua 10795b3e41ffSAmlan Barua EXTERN_C_BEGIN 10805b3e41ffSAmlan Barua #undef __FUNCT__ 10813c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 10823c3a9c75SAmlan Barua /* 10833564f093SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 10843564f093SHong Zhang 10853c3a9c75SAmlan Barua Options Database Keys: 10863c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 10873c3a9c75SAmlan Barua 10883c3a9c75SAmlan Barua Level: intermediate 10893c3a9c75SAmlan Barua 10903c3a9c75SAmlan Barua */ 1091b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 1092b2b77a04SHong Zhang { 1093b2b77a04SHong Zhang PetscErrorCode ierr; 1094*ce94432eSBarry Smith MPI_Comm comm; 1095b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1096b2b77a04SHong Zhang Mat_FFTW *fftw; 1097b2b77a04SHong Zhang PetscInt n =fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1098b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1099b2b77a04SHong Zhang PetscBool flg; 11004f48bc67SAmlan Barua PetscInt p_flag,partial_dim=1,ctr; 110111902ff2SHong Zhang PetscMPIInt size,rank; 11029496c9aeSAmlan Barua ptrdiff_t *pdim; 11039496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 11049496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11059496c9aeSAmlan Barua ptrdiff_t temp; 11068ca4c034SAmlan Barua PetscInt N1,tot_dim; 11074f48bc67SAmlan Barua #else 11084f48bc67SAmlan Barua PetscInt n1; 11099496c9aeSAmlan Barua #endif 11109496c9aeSAmlan Barua 1111b2b77a04SHong Zhang PetscFunctionBegin; 1112*ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1113b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 111411902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1115c92418ccSAmlan Barua 11161878d478SAmlan Barua fftw_mpi_init(); 111711902ff2SHong Zhang pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 111811902ff2SHong Zhang pdim[0] = dim[0]; 11198ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11208ca4c034SAmlan Barua tot_dim = 2*dim[0]; 11218ca4c034SAmlan Barua #endif 11223564f093SHong Zhang for (ctr=1; ctr<ndim; ctr++) { 11236882372aSHong Zhang partial_dim *= dim[ctr]; 112411902ff2SHong Zhang pdim[ctr] = dim[ctr]; 11258ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1126db4deed7SKarl Rupp if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1127db4deed7SKarl Rupp else tot_dim *= dim[ctr]; 11288ca4c034SAmlan Barua #endif 11296882372aSHong Zhang } 11303c3a9c75SAmlan Barua 1131b2b77a04SHong Zhang if (size == 1) { 1132e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1133b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1134b2b77a04SHong Zhang n = N; 1135e81bb599SAmlan Barua #else 1136e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1137e81bb599SAmlan Barua n = tot_dim; 1138e81bb599SAmlan Barua #endif 1139e81bb599SAmlan Barua 1140b2b77a04SHong Zhang } else { 11411abc6020SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 1142b2b77a04SHong Zhang switch (ndim) { 1143b2b77a04SHong Zhang case 1: 11443c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11453c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1146e5d7f247SAmlan Barua #else 11479496c9aeSAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 114826fbe8dcSKarl Rupp 11496882372aSHong Zhang n = (PetscInt)local_n0; 11509496c9aeSAmlan Barua n1 = (PetscInt)local_n1; 11519496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1152e5d7f247SAmlan Barua #endif 1153b2b77a04SHong Zhang break; 1154b2b77a04SHong Zhang case 2: 11555b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 1156b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1157b2b77a04SHong Zhang /* 1158b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1159b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 1160b2b77a04SHong Zhang */ 1161b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1162b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 11635b3e41ffSAmlan Barua #else 11644f8276c3SHong Zhang alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 116526fbe8dcSKarl Rupp 11665b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1167c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 11685b3e41ffSAmlan Barua #endif 1169b2b77a04SHong Zhang break; 1170b2b77a04SHong Zhang case 3: 117151d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 117251d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 117326fbe8dcSKarl Rupp 11746882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 11756882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 117651d1eed7SAmlan Barua #else 11774f8276c3SHong Zhang alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 117826fbe8dcSKarl Rupp 117951d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1180c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 118151d1eed7SAmlan Barua #endif 1182b2b77a04SHong Zhang break; 1183b2b77a04SHong Zhang default: 1184b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 118511902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 118626fbe8dcSKarl Rupp 11876882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 11886882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1189b3a17365SAmlan Barua #else 1190b3a17365SAmlan Barua temp = pdim[ndim-1]; 119126fbe8dcSKarl Rupp 1192b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 119326fbe8dcSKarl Rupp 11944f8276c3SHong Zhang alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 119526fbe8dcSKarl Rupp 1196b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1197b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 119826fbe8dcSKarl Rupp 1199b3a17365SAmlan Barua pdim[ndim-1] = temp; 120026fbe8dcSKarl Rupp 1201c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1202b3a17365SAmlan Barua #endif 1203b2b77a04SHong Zhang break; 1204b2b77a04SHong Zhang } 1205b2b77a04SHong Zhang } 1206b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1207b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1208b2b77a04SHong Zhang fft->data = (void*)fftw; 1209b2b77a04SHong Zhang 1210b2b77a04SHong Zhang fft->n = n; 12110c74a584SJed Brown fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1212e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 121326fbe8dcSKarl Rupp 1214c92418ccSAmlan Barua ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t*)&(fftw->dim_fftw));CHKERRQ(ierr); 121526fbe8dcSKarl Rupp 1216b1301b2eSHong Zhang for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1217c92418ccSAmlan Barua 1218b2b77a04SHong Zhang fftw->p_forward = 0; 1219b2b77a04SHong Zhang fftw->p_backward = 0; 1220b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1221b2b77a04SHong Zhang 1222b2b77a04SHong Zhang if (size == 1) { 1223b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1224b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1225b2b77a04SHong Zhang } else { 1226b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1227b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1228b2b77a04SHong Zhang } 1229b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1230b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 12314a52bad8SHong Zhang A->preallocated = PETSC_TRUE; 123226fbe8dcSKarl Rupp 12334a52bad8SHong Zhang ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);CHKERRQ(ierr); 12344a52bad8SHong Zhang ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 12354a52bad8SHong Zhang ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1236b2b77a04SHong Zhang 1237b2b77a04SHong Zhang /* get runtime options */ 1238*ce94432eSBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1239b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 124026fbe8dcSKarl Rupp if (flg) fftw->p_flag = (unsigned)p_flag; 12414a52bad8SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 1242b2b77a04SHong Zhang PetscFunctionReturn(0); 1243b2b77a04SHong Zhang } 1244b2b77a04SHong Zhang EXTERN_C_END 12453c3a9c75SAmlan Barua 12463c3a9c75SAmlan Barua 12473c3a9c75SAmlan Barua 12483c3a9c75SAmlan Barua 1249