1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14e5d7f247SAmlan Barua PetscInt partial_dim; 15b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 16b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 19b2b77a04SHong Zhang } Mat_FFTW; 20b2b77a04SHong Zhang 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27b2b77a04SHong Zhang 2897504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel 2997504071SAmlan Barua Input parameter: 303564f093SHong Zhang A - the matrix 3197504071SAmlan Barua x - the vector on which FDFT will be performed 3297504071SAmlan Barua 3397504071SAmlan Barua Output parameter: 3497504071SAmlan Barua y - vector that stores result of FDFT 3597504071SAmlan Barua */ 36b2b77a04SHong Zhang #undef __FUNCT__ 37b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 38b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 39b2b77a04SHong Zhang { 40b2b77a04SHong Zhang PetscErrorCode ierr; 41b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 42b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 43b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 44b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 45b2b77a04SHong Zhang 46b2b77a04SHong Zhang PetscFunctionBegin; 47b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 48b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 49b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 50b2b77a04SHong Zhang switch (ndim) { 51b2b77a04SHong Zhang case 1: 5258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 53b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 5458a912c5SAmlan Barua #else 5558a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 5658a912c5SAmlan Barua #endif 57b2b77a04SHong Zhang break; 58b2b77a04SHong Zhang case 2: 5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 60b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6158a912c5SAmlan Barua #else 6258a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 6358a912c5SAmlan Barua #endif 64b2b77a04SHong Zhang break; 65b2b77a04SHong Zhang case 3: 6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 67b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6858a912c5SAmlan Barua #else 6958a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 7058a912c5SAmlan Barua #endif 71b2b77a04SHong Zhang break; 72b2b77a04SHong Zhang default: 7358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 74b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 7558a912c5SAmlan Barua #else 7658a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 7758a912c5SAmlan Barua #endif 78b2b77a04SHong Zhang break; 79b2b77a04SHong Zhang } 80b2b77a04SHong Zhang fftw->finarray = x_array; 81b2b77a04SHong Zhang fftw->foutarray = y_array; 82b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 83b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 84b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 85b2b77a04SHong Zhang } else { /* use existing plan */ 86b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 87b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 88b2b77a04SHong Zhang } else { 89b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 90b2b77a04SHong Zhang } 91b2b77a04SHong Zhang } 92b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 93b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 94b2b77a04SHong Zhang PetscFunctionReturn(0); 95b2b77a04SHong Zhang } 96b2b77a04SHong Zhang 9797504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT 9897504071SAmlan Barua Input parameter: 993564f093SHong Zhang A - the matrix 10097504071SAmlan Barua x - the vector on which BDFT will be performed 10197504071SAmlan Barua 10297504071SAmlan Barua Output parameter: 10397504071SAmlan Barua y - vector that stores result of BDFT 10497504071SAmlan Barua */ 10597504071SAmlan Barua 106b2b77a04SHong Zhang #undef __FUNCT__ 107b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 108b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 109b2b77a04SHong Zhang { 110b2b77a04SHong Zhang PetscErrorCode ierr; 111b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 112b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 113b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 114b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 115b2b77a04SHong Zhang 116b2b77a04SHong Zhang PetscFunctionBegin; 117b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 118b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 119b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 120b2b77a04SHong Zhang switch (ndim) { 121b2b77a04SHong Zhang case 1: 12258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 123b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12458a912c5SAmlan Barua #else 125e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 12658a912c5SAmlan Barua #endif 127b2b77a04SHong Zhang break; 128b2b77a04SHong Zhang case 2: 12958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 130b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 13158a912c5SAmlan Barua #else 132e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 13358a912c5SAmlan Barua #endif 134b2b77a04SHong Zhang break; 135b2b77a04SHong Zhang case 3: 13658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 137b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 13858a912c5SAmlan Barua #else 139e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 14058a912c5SAmlan Barua #endif 141b2b77a04SHong Zhang break; 142b2b77a04SHong Zhang default: 14358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 144b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 14558a912c5SAmlan Barua #else 146e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 14758a912c5SAmlan Barua #endif 148b2b77a04SHong Zhang break; 149b2b77a04SHong Zhang } 150b2b77a04SHong Zhang fftw->binarray = x_array; 151b2b77a04SHong Zhang fftw->boutarray = y_array; 152b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 153b2b77a04SHong Zhang } else { /* use existing plan */ 154b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 155b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 156b2b77a04SHong Zhang } else { 157b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 158b2b77a04SHong Zhang } 159b2b77a04SHong Zhang } 160b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 161b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 162b2b77a04SHong Zhang PetscFunctionReturn(0); 163b2b77a04SHong Zhang } 164b2b77a04SHong Zhang 16597504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel 16697504071SAmlan Barua Input parameter: 1673564f093SHong Zhang A - the matrix 16897504071SAmlan Barua x - the vector on which FDFT will be performed 16997504071SAmlan Barua 17097504071SAmlan Barua Output parameter: 17197504071SAmlan Barua y - vector that stores result of FDFT 17297504071SAmlan Barua */ 173b2b77a04SHong Zhang #undef __FUNCT__ 174b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 175b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 176b2b77a04SHong Zhang { 177b2b77a04SHong Zhang PetscErrorCode ierr; 178b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 179b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 180b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 181c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 182ce94432eSBarry Smith MPI_Comm comm; 183b2b77a04SHong Zhang 184b2b77a04SHong Zhang PetscFunctionBegin; 185ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 186b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 187b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 188b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 189b2b77a04SHong Zhang switch (ndim) { 190b2b77a04SHong Zhang case 1: 1913c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 192b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 193ae0a50aaSHong Zhang #else 1944f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 1953c3a9c75SAmlan Barua #endif 196b2b77a04SHong Zhang break; 197b2b77a04SHong Zhang case 2: 19897504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 199b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2003c3a9c75SAmlan Barua #else 2013c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2023c3a9c75SAmlan Barua #endif 203b2b77a04SHong Zhang break; 204b2b77a04SHong Zhang case 3: 2053c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 206b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2073c3a9c75SAmlan Barua #else 20851d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2093c3a9c75SAmlan Barua #endif 210b2b77a04SHong Zhang break; 211b2b77a04SHong Zhang default: 2123c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 213c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2143c3a9c75SAmlan Barua #else 2153c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2163c3a9c75SAmlan Barua #endif 217b2b77a04SHong Zhang break; 218b2b77a04SHong Zhang } 219b2b77a04SHong Zhang fftw->finarray = x_array; 220b2b77a04SHong Zhang fftw->foutarray = y_array; 221b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 222b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 223b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 224b2b77a04SHong Zhang } else { /* use existing plan */ 225b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 226b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 227b2b77a04SHong Zhang } else { 228b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 229b2b77a04SHong Zhang } 230b2b77a04SHong Zhang } 231b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 232b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 233b2b77a04SHong Zhang PetscFunctionReturn(0); 234b2b77a04SHong Zhang } 235b2b77a04SHong Zhang 23697504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT 23797504071SAmlan Barua Input parameter: 2383564f093SHong Zhang A - the matrix 23997504071SAmlan Barua x - the vector on which BDFT will be performed 24097504071SAmlan Barua 24197504071SAmlan Barua Output parameter: 24297504071SAmlan Barua y - vector that stores result of BDFT 24397504071SAmlan Barua */ 244b2b77a04SHong Zhang #undef __FUNCT__ 245b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 246b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 247b2b77a04SHong Zhang { 248b2b77a04SHong Zhang PetscErrorCode ierr; 249b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 250b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 251b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 252c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 253ce94432eSBarry Smith MPI_Comm comm; 254b2b77a04SHong Zhang 255b2b77a04SHong Zhang PetscFunctionBegin; 256ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 257b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 258b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 259b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 260b2b77a04SHong Zhang switch (ndim) { 261b2b77a04SHong Zhang case 1: 2623c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 263b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 264ae0a50aaSHong Zhang #else 2654f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 2663c3a9c75SAmlan Barua #endif 267b2b77a04SHong Zhang break; 268b2b77a04SHong Zhang case 2: 26997504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 270b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2713c3a9c75SAmlan Barua #else 2723c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 2733c3a9c75SAmlan Barua #endif 274b2b77a04SHong Zhang break; 275b2b77a04SHong Zhang case 3: 2763c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 277b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2783c3a9c75SAmlan Barua #else 2793c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 2803c3a9c75SAmlan Barua #endif 281b2b77a04SHong Zhang break; 282b2b77a04SHong Zhang default: 2833c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 284c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2853c3a9c75SAmlan Barua #else 2863c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 2873c3a9c75SAmlan Barua #endif 288b2b77a04SHong Zhang break; 289b2b77a04SHong Zhang } 290b2b77a04SHong Zhang fftw->binarray = x_array; 291b2b77a04SHong Zhang fftw->boutarray = y_array; 292b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 293b2b77a04SHong Zhang } else { /* use existing plan */ 294b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 295b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 296b2b77a04SHong Zhang } else { 297b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 298b2b77a04SHong Zhang } 299b2b77a04SHong Zhang } 300b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 301b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 302b2b77a04SHong Zhang PetscFunctionReturn(0); 303b2b77a04SHong Zhang } 304b2b77a04SHong Zhang 305b2b77a04SHong Zhang #undef __FUNCT__ 306b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 307b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 308b2b77a04SHong Zhang { 309b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 310b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 311b2b77a04SHong Zhang PetscErrorCode ierr; 312b2b77a04SHong Zhang 313b2b77a04SHong Zhang PetscFunctionBegin; 314b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 315b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 316b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 317bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 3186ccf0b3eSHong Zhang fftw_mpi_cleanup(); 319b2b77a04SHong Zhang PetscFunctionReturn(0); 320b2b77a04SHong Zhang } 321b2b77a04SHong Zhang 322c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 323b2b77a04SHong Zhang #undef __FUNCT__ 324b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 325b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 326b2b77a04SHong Zhang { 327b2b77a04SHong Zhang PetscErrorCode ierr; 328b2b77a04SHong Zhang PetscScalar *array; 329b2b77a04SHong Zhang 330b2b77a04SHong Zhang PetscFunctionBegin; 331b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 332b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 333b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 334b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 335b2b77a04SHong Zhang PetscFunctionReturn(0); 336b2b77a04SHong Zhang } 337b2b77a04SHong Zhang 3384f7415efSAmlan Barua #undef __FUNCT__ 3394be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW" 3404be45526SHong Zhang /*@ 341b2aa233eSHong Zhang MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 3424f7415efSAmlan Barua parallel layout determined by FFTW 3434f7415efSAmlan Barua 3444f7415efSAmlan Barua Collective on Mat 3454f7415efSAmlan Barua 3464f7415efSAmlan Barua Input Parameter: 3473564f093SHong Zhang . A - the matrix 3484f7415efSAmlan Barua 3494f7415efSAmlan Barua Output Parameter: 350cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW 351cc55f3b1SHong Zhang - y - (optional) output vector of forward FFTW 352cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW 3534f7415efSAmlan Barua 3544f7415efSAmlan Barua Level: advanced 3553564f093SHong Zhang 35697504071SAmlan Barua Note: The parallel layout of output of forward FFTW is always same as the input 35797504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 35897504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 35997504071SAmlan Barua Also note that we need to provide enough space while doing parallel real transform. 36097504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 36197504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 362e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 363e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two 364e0ec536eSAmlan Barua zeros if it is odd only one zero is needed. 365e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local 366e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for 367e0ec536eSAmlan Barua each processor and returns that. 3684f7415efSAmlan Barua 3694f7415efSAmlan Barua .seealso: MatCreateFFTW() 3704be45526SHong Zhang @*/ 3714be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 3724be45526SHong Zhang { 3734be45526SHong Zhang PetscErrorCode ierr; 374b4c3921fSHong Zhang 3754be45526SHong Zhang PetscFunctionBegin; 3764be45526SHong Zhang ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 3774be45526SHong Zhang PetscFunctionReturn(0); 3784be45526SHong Zhang } 3794be45526SHong Zhang 3804be45526SHong Zhang #undef __FUNCT__ 3814be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW" 3824be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 3834f7415efSAmlan Barua { 3844f7415efSAmlan Barua PetscErrorCode ierr; 3854f7415efSAmlan Barua PetscMPIInt size,rank; 386ce94432eSBarry Smith MPI_Comm comm; 3874f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 3884f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 3899496c9aeSAmlan Barua PetscInt N = fft->N; 3904f7415efSAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 3914f7415efSAmlan Barua 3924f7415efSAmlan Barua PetscFunctionBegin; 3934f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 3944f7415efSAmlan Barua PetscValidType(A,1); 395ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 3964f7415efSAmlan Barua 3974f8276c3SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 3984f8276c3SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 3994f7415efSAmlan Barua if (size == 1) { /* sequential case */ 4004f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4014f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 4024f7415efSAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 4034f7415efSAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 4044f7415efSAmlan Barua #else 4058ca4c034SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 4068ca4c034SAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 4078ca4c034SAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 4084f7415efSAmlan Barua #endif 40997504071SAmlan Barua } else { /* parallel cases */ 4104f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 4119496c9aeSAmlan Barua ptrdiff_t local_n1; 4129496c9aeSAmlan Barua fftw_complex *data_fout; 4139496c9aeSAmlan Barua ptrdiff_t local_1_start; 4149496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4159496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 4169496c9aeSAmlan Barua #else 4174f7415efSAmlan Barua double *data_finr,*data_boutr; 4186ccf0b3eSHong Zhang PetscInt n1,N1; 4199496c9aeSAmlan Barua ptrdiff_t temp; 4209496c9aeSAmlan Barua #endif 4219496c9aeSAmlan Barua 4224f7415efSAmlan Barua switch (ndim) { 4234f7415efSAmlan Barua case 1: 42457625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4254f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 42657625b84SAmlan Barua #else 42757625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 42857625b84SAmlan Barua if (fin) { 42957625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 430778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 43126fbe8dcSKarl Rupp 43257625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 43357625b84SAmlan Barua } 43457625b84SAmlan Barua if (fout) { 43557625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 436778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 43726fbe8dcSKarl Rupp 43857625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 43957625b84SAmlan Barua } 44057625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 44157625b84SAmlan Barua if (bout) { 44257625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 443778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 44426fbe8dcSKarl Rupp 44557625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 44657625b84SAmlan Barua } 44757625b84SAmlan Barua break; 44857625b84SAmlan Barua #endif 4494f7415efSAmlan Barua case 2: 45097504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 4514f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4524f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 4534f7415efSAmlan Barua if (fin) { 4544f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 455778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 45626fbe8dcSKarl Rupp 4574f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4584f7415efSAmlan Barua } 4594f7415efSAmlan Barua if (bout) { 4604f7415efSAmlan Barua data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 461778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 46226fbe8dcSKarl Rupp 4634f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4644f7415efSAmlan Barua } 46557625b84SAmlan Barua if (fout) { 46657625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 467778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 46826fbe8dcSKarl Rupp 46957625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 47057625b84SAmlan Barua } 4714f7415efSAmlan Barua #else 4724f7415efSAmlan Barua /* Get local size */ 4734f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 4744f7415efSAmlan Barua if (fin) { 4754f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 476778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 47726fbe8dcSKarl Rupp 4784f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4794f7415efSAmlan Barua } 4804f7415efSAmlan Barua if (fout) { 4814f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 482778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 48326fbe8dcSKarl Rupp 4844f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4854f7415efSAmlan Barua } 4864f7415efSAmlan Barua if (bout) { 4874f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 488778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 48926fbe8dcSKarl Rupp 4904f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4914f7415efSAmlan Barua } 4924f7415efSAmlan Barua #endif 4934f7415efSAmlan Barua break; 4944f7415efSAmlan Barua case 3: 4954f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4964f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4974f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 4984f7415efSAmlan Barua if (fin) { 4994f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 500778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 50126fbe8dcSKarl Rupp 5024f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5034f7415efSAmlan Barua } 5044f7415efSAmlan Barua if (bout) { 5054f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 506778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 50726fbe8dcSKarl Rupp 5084f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5094f7415efSAmlan Barua } 5103564f093SHong Zhang 51157625b84SAmlan Barua if (fout) { 51257625b84SAmlan Barua data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 513778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 51426fbe8dcSKarl Rupp 51557625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 51657625b84SAmlan Barua } 5174f7415efSAmlan Barua #else 5180c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 5190c9b18e4SAmlan Barua if (fin) { 5200c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 521778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 52226fbe8dcSKarl Rupp 5230c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5240c9b18e4SAmlan Barua } 5250c9b18e4SAmlan Barua if (fout) { 5260c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 527778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 52826fbe8dcSKarl Rupp 5290c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5300c9b18e4SAmlan Barua } 5310c9b18e4SAmlan Barua if (bout) { 5320c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 533778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 53426fbe8dcSKarl Rupp 5350c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5360c9b18e4SAmlan Barua } 5374f7415efSAmlan Barua #endif 5384f7415efSAmlan Barua break; 5394f7415efSAmlan Barua default: 5404f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5414f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 54226fbe8dcSKarl Rupp 5434f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 54426fbe8dcSKarl Rupp 5454f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5464f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 54726fbe8dcSKarl Rupp 5484f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 5494f7415efSAmlan Barua 5504f7415efSAmlan Barua if (fin) { 5514f7415efSAmlan Barua data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 552778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 55326fbe8dcSKarl Rupp 5544f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5554f7415efSAmlan Barua } 5564f7415efSAmlan Barua if (bout) { 5574f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 558778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 55926fbe8dcSKarl Rupp 5609496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5614f7415efSAmlan Barua } 5623564f093SHong Zhang 56357625b84SAmlan Barua if (fout) { 56457625b84SAmlan Barua data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 565778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 56626fbe8dcSKarl Rupp 56757625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 56857625b84SAmlan Barua } 5694f7415efSAmlan Barua #else 5700c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 5710c9b18e4SAmlan Barua if (fin) { 5720c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 573778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 57426fbe8dcSKarl Rupp 5750c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5760c9b18e4SAmlan Barua } 5770c9b18e4SAmlan Barua if (fout) { 5780c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 579778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 58026fbe8dcSKarl Rupp 5810c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5820c9b18e4SAmlan Barua } 5830c9b18e4SAmlan Barua if (bout) { 5840c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 585778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 58626fbe8dcSKarl Rupp 5870c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5880c9b18e4SAmlan Barua } 5894f7415efSAmlan Barua #endif 5904f7415efSAmlan Barua break; 5914f7415efSAmlan Barua } 5924f7415efSAmlan Barua } 5934f7415efSAmlan Barua PetscFunctionReturn(0); 5944f7415efSAmlan Barua } 5954f7415efSAmlan Barua 596c92418ccSAmlan Barua #undef __FUNCT__ 59774a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW" 5983564f093SHong Zhang /*@ 5993564f093SHong Zhang VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 60054efbe56SHong Zhang 6013564f093SHong Zhang Collective on Mat 6023564f093SHong Zhang 6033564f093SHong Zhang Input Parameters: 6043564f093SHong Zhang + A - FFTW matrix 6053564f093SHong Zhang - x - the PETSc vector 6063564f093SHong Zhang 6073564f093SHong Zhang Output Parameters: 6083564f093SHong Zhang . y - the FFTW vector 6093564f093SHong Zhang 610b2b77a04SHong Zhang Options Database Keys: 6113564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags 612b2b77a04SHong Zhang 613b2b77a04SHong Zhang Level: intermediate 6143564f093SHong Zhang 61597504071SAmlan Barua Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 61697504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 61797504071SAmlan Barua zeros. This routine does that job by scattering operation. 61897504071SAmlan Barua 6193564f093SHong Zhang .seealso: VecScatterFFTWToPetsc() 6203564f093SHong Zhang @*/ 6213564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 6223564f093SHong Zhang { 6233564f093SHong Zhang PetscErrorCode ierr; 624b2b77a04SHong Zhang 6253564f093SHong Zhang PetscFunctionBegin; 6263564f093SHong Zhang ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 6273564f093SHong Zhang PetscFunctionReturn(0); 6283564f093SHong Zhang } 6293c3a9c75SAmlan Barua 6303c3a9c75SAmlan Barua #undef __FUNCT__ 6311986ecc6SHong Zhang #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 63274a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 6333c3a9c75SAmlan Barua { 6343c3a9c75SAmlan Barua PetscErrorCode ierr; 635ce94432eSBarry Smith MPI_Comm comm; 6363c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 6373c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 6389496c9aeSAmlan Barua PetscInt N =fft->N; 639b5d11533SAmlan Barua PetscInt ndim =fft->ndim,*dim=fft->dim; 6409496c9aeSAmlan Barua PetscInt low; 6418ca4c034SAmlan Barua PetscInt rank,size,vsize,vsize1; 6423c3a9c75SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 6439496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 6443c3a9c75SAmlan Barua VecScatter vecscat; 6453c3a9c75SAmlan Barua IS list1,list2; 6469496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6479496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 6489496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 6499496c9aeSAmlan Barua PetscInt N1, n1,NM; 6509496c9aeSAmlan Barua ptrdiff_t temp; 6519496c9aeSAmlan Barua #endif 6523c3a9c75SAmlan Barua 6533564f093SHong Zhang PetscFunctionBegin; 654ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 655f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 656f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 6570298fd71SBarry Smith ierr = VecGetOwnershipRange(y,&low,NULL); 6583c3a9c75SAmlan Barua 6593564f093SHong Zhang if (size==1) { 6608ca4c034SAmlan Barua ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 6618ca4c034SAmlan Barua ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 6629496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 6636971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 6646971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6656971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6666971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 667b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 6683564f093SHong Zhang } else { 6693c3a9c75SAmlan Barua switch (ndim) { 6703c3a9c75SAmlan Barua case 1: 67164657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 67264657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 67326fbe8dcSKarl Rupp 6744f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 6754f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,low,1,&list2); 67664657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 67764657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 67864657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 67964657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 68064657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 68164657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 68264657d84SAmlan Barua #else 683e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 68464657d84SAmlan Barua #endif 6853c3a9c75SAmlan Barua break; 6863c3a9c75SAmlan Barua case 2: 687bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 688bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 68926fbe8dcSKarl Rupp 6904f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 6914f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 692bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 693bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 694bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 695bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 696bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 697bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 698bd59e6c6SAmlan Barua #else 6993c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 70026fbe8dcSKarl Rupp 7013c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 7029496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 7039496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 7043c3a9c75SAmlan Barua 7053564f093SHong Zhang if (dim[1]%2==0) { 7063c3a9c75SAmlan Barua NM = dim[1]+2; 7073564f093SHong Zhang } else { 7083c3a9c75SAmlan Barua NM = dim[1]+1; 7093564f093SHong Zhang } 7103c3a9c75SAmlan Barua for (i=0; i<local_n0; i++) { 7113c3a9c75SAmlan Barua for (j=0; j<dim[1]; j++) { 7123c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 7133c3a9c75SAmlan Barua tempindx1 = i*NM + j; 71426fbe8dcSKarl Rupp 7155b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 7163c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 7173c3a9c75SAmlan Barua } 7183c3a9c75SAmlan Barua } 7193c3a9c75SAmlan Barua 7203c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 7213c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 7223c3a9c75SAmlan Barua 723f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 724f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 726f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 727b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 728b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 729b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 730b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 731bd59e6c6SAmlan Barua #endif 7329496c9aeSAmlan Barua break; 7333c3a9c75SAmlan Barua 7343c3a9c75SAmlan Barua case 3: 735bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 736bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 73726fbe8dcSKarl Rupp 7384f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 7394f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 740bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 741bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 743bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 744bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 745bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 746bd59e6c6SAmlan Barua #else 74751d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 74826fbe8dcSKarl Rupp 74951d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 75051d1eed7SAmlan Barua 7519496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 7529496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 75351d1eed7SAmlan Barua 754db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 755db4deed7SKarl Rupp else NM = dim[2]+1; 75651d1eed7SAmlan Barua 75751d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 75851d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 75951d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 76051d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 76151d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 76226fbe8dcSKarl Rupp 76351d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 76451d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 76551d1eed7SAmlan Barua } 76651d1eed7SAmlan Barua } 76751d1eed7SAmlan Barua } 76851d1eed7SAmlan Barua 76951d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 77051d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 77151d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 77251d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 77351d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 77451d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 775b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 776b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 777b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 778b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 779bd59e6c6SAmlan Barua #endif 7809496c9aeSAmlan Barua break; 7813c3a9c75SAmlan Barua 7823c3a9c75SAmlan Barua default: 783bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 784bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 78526fbe8dcSKarl Rupp 7864f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 7874f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 788bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 789bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 790bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 791bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 792bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 793bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 794bd59e6c6SAmlan Barua #else 795e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 79626fbe8dcSKarl Rupp 797e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 79826fbe8dcSKarl Rupp 799e5d7f247SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 80026fbe8dcSKarl Rupp 801e5d7f247SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 80226fbe8dcSKarl Rupp 803e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 804e5d7f247SAmlan Barua 805e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 806e5d7f247SAmlan Barua 807e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 808e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 809e5d7f247SAmlan Barua 810db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 811db4deed7SKarl Rupp else NM = 1; 812e5d7f247SAmlan Barua 8136971673cSAmlan Barua j = low; 8143564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 8156971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 8166971673cSAmlan Barua indx2[i] = j; 81726fbe8dcSKarl Rupp if (k%dim[ndim-1]==0) j+=NM; 8186971673cSAmlan Barua j++; 8196971673cSAmlan Barua } 8206971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 8216971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 8226971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 8236971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8246971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8256971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 826b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 827b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 828b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 829b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 830bd59e6c6SAmlan Barua #endif 8319496c9aeSAmlan Barua break; 8323c3a9c75SAmlan Barua } 833e81bb599SAmlan Barua } 8343564f093SHong Zhang PetscFunctionReturn(0); 8353c3a9c75SAmlan Barua } 8363c3a9c75SAmlan Barua 8373c3a9c75SAmlan Barua #undef __FUNCT__ 83874a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc" 8393564f093SHong Zhang /*@ 8403564f093SHong Zhang VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 8413564f093SHong Zhang 8423564f093SHong Zhang Collective on Mat 8433564f093SHong Zhang 8443564f093SHong Zhang Input Parameters: 8453564f093SHong Zhang + A - FFTW matrix 8463564f093SHong Zhang - x - FFTW vector 8473564f093SHong Zhang 8483564f093SHong Zhang Output Parameters: 8493564f093SHong Zhang . y - PETSc vector 8503564f093SHong Zhang 8513564f093SHong Zhang Level: intermediate 8523564f093SHong Zhang 8533564f093SHong Zhang Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 8543564f093SHong Zhang VecScatterFFTWToPetsc removes those extra zeros. 8553564f093SHong Zhang 8563564f093SHong Zhang .seealso: VecScatterPetscToFFTW() 8573564f093SHong Zhang @*/ 85874a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 8593c3a9c75SAmlan Barua { 8603c3a9c75SAmlan Barua PetscErrorCode ierr; 8615fd66863SKarl Rupp 8623c3a9c75SAmlan Barua PetscFunctionBegin; 86374a26884SAmlan Barua ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 8643c3a9c75SAmlan Barua PetscFunctionReturn(0); 8653c3a9c75SAmlan Barua } 86654efbe56SHong Zhang 8673c3a9c75SAmlan Barua #undef __FUNCT__ 86874a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 86974a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 8705b3e41ffSAmlan Barua { 8715b3e41ffSAmlan Barua PetscErrorCode ierr; 872ce94432eSBarry Smith MPI_Comm comm; 8735b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 8745b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 8759496c9aeSAmlan Barua PetscInt N = fft->N; 876b3655f67SAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim; 8779496c9aeSAmlan Barua PetscInt low; 8789496c9aeSAmlan Barua PetscInt rank,size; 8795b3e41ffSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 8809496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 8815b3e41ffSAmlan Barua VecScatter vecscat; 8825b3e41ffSAmlan Barua IS list1,list2; 8839496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 8849496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 8859496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 8869496c9aeSAmlan Barua PetscInt N1, n1,NM; 8879496c9aeSAmlan Barua ptrdiff_t temp; 8889496c9aeSAmlan Barua #endif 8899496c9aeSAmlan Barua 8903564f093SHong Zhang PetscFunctionBegin; 891ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 8925b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 8935b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 8940298fd71SBarry Smith ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 8955b3e41ffSAmlan Barua 896e81bb599SAmlan Barua if (size==1) { 897b3655f67SAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 8986971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 8996971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9006971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9016971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 902b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 903e81bb599SAmlan Barua 9043564f093SHong Zhang } else { 905e81bb599SAmlan Barua switch (ndim) { 906e81bb599SAmlan Barua case 1: 90764657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 90864657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 90926fbe8dcSKarl Rupp 9104f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 9114f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,low,1,&list2); 91264657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 91364657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 91464657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 91564657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 91664657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 91764657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 91864657d84SAmlan Barua #else 9196a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 92064657d84SAmlan Barua #endif 9215b3e41ffSAmlan Barua break; 9225b3e41ffSAmlan Barua case 2: 923bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 924bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 92526fbe8dcSKarl Rupp 9264f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 9274f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 928bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 929bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 930bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 931bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 932bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 933bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 934bd59e6c6SAmlan Barua #else 9355b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 93626fbe8dcSKarl Rupp 9375b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 9385b3e41ffSAmlan Barua 9399496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 9409496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 9415b3e41ffSAmlan Barua 942db4deed7SKarl Rupp if (dim[1]%2==0) NM = dim[1]+2; 943db4deed7SKarl Rupp else NM = dim[1]+1; 9445b3e41ffSAmlan Barua 9455b3e41ffSAmlan Barua for (i=0; i<local_n0; i++) { 9465b3e41ffSAmlan Barua for (j=0; j<dim[1]; j++) { 9475b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 9485b3e41ffSAmlan Barua tempindx1 = i*NM + j; 94926fbe8dcSKarl Rupp 9505b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 9515b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 9525b3e41ffSAmlan Barua } 9535b3e41ffSAmlan Barua } 9545b3e41ffSAmlan Barua 9555b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 9565b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 9575b3e41ffSAmlan Barua 9585b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 9595b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9605b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9615b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 962b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 963b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 964b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 965b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 966bd59e6c6SAmlan Barua #endif 9679496c9aeSAmlan Barua break; 9685b3e41ffSAmlan Barua case 3: 969bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 970bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 97126fbe8dcSKarl Rupp 9724f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 9734f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 974bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 975bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 976bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 977bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 978bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 979bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 980bd59e6c6SAmlan Barua #else 981bd59e6c6SAmlan Barua 98251d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 98326fbe8dcSKarl Rupp 98451d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 98551d1eed7SAmlan Barua 9869496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 9879496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 98851d1eed7SAmlan Barua 989db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 990db4deed7SKarl Rupp else NM = dim[2]+1; 99151d1eed7SAmlan Barua 99251d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 99351d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 99451d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 99551d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 99651d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 99726fbe8dcSKarl Rupp 99851d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 99951d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 100051d1eed7SAmlan Barua } 100151d1eed7SAmlan Barua } 100251d1eed7SAmlan Barua } 100351d1eed7SAmlan Barua 100451d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 100551d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 100651d1eed7SAmlan Barua 100751d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 100851d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 100951d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 101051d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1011b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1012b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1013b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1014b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1015bd59e6c6SAmlan Barua #endif 10169496c9aeSAmlan Barua break; 10175b3e41ffSAmlan Barua default: 1018bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1019bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 102026fbe8dcSKarl Rupp 10214f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 10224f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1023bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1024bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1025bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1027bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1028bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1029bd59e6c6SAmlan Barua #else 1030ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 103126fbe8dcSKarl Rupp 1032ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 103326fbe8dcSKarl Rupp 1034ba6e06d0SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 103526fbe8dcSKarl Rupp 1036ba6e06d0SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 103726fbe8dcSKarl Rupp 1038ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1039ba6e06d0SAmlan Barua 1040ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1041ba6e06d0SAmlan Barua 1042ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1043ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1044ba6e06d0SAmlan Barua 1045db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 1046db4deed7SKarl Rupp else NM = 1; 1047ba6e06d0SAmlan Barua 1048ba6e06d0SAmlan Barua j = low; 10493564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1050ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1051ba6e06d0SAmlan Barua indx2[i] = j; 10523564f093SHong Zhang if (k%dim[ndim-1]==0) j+=NM; 1053ba6e06d0SAmlan Barua j++; 1054ba6e06d0SAmlan Barua } 1055ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1056ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1057ba6e06d0SAmlan Barua 1058ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1059ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1060ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1061ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1062b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1063b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1064b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1065b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1066bd59e6c6SAmlan Barua #endif 10679496c9aeSAmlan Barua break; 10685b3e41ffSAmlan Barua } 1069e81bb599SAmlan Barua } 10703564f093SHong Zhang PetscFunctionReturn(0); 10715b3e41ffSAmlan Barua } 10725b3e41ffSAmlan Barua 10735b3e41ffSAmlan Barua #undef __FUNCT__ 10743c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 10753c3a9c75SAmlan Barua /* 10763564f093SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 10773564f093SHong Zhang 10783c3a9c75SAmlan Barua Options Database Keys: 10793c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 10803c3a9c75SAmlan Barua 10813c3a9c75SAmlan Barua Level: intermediate 10823c3a9c75SAmlan Barua 10833c3a9c75SAmlan Barua */ 1084*8cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1085b2b77a04SHong Zhang { 1086b2b77a04SHong Zhang PetscErrorCode ierr; 1087ce94432eSBarry Smith MPI_Comm comm; 1088b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1089b2b77a04SHong Zhang Mat_FFTW *fftw; 1090b2b77a04SHong Zhang PetscInt n =fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1091b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1092b2b77a04SHong Zhang PetscBool flg; 10934f48bc67SAmlan Barua PetscInt p_flag,partial_dim=1,ctr; 109411902ff2SHong Zhang PetscMPIInt size,rank; 10959496c9aeSAmlan Barua ptrdiff_t *pdim; 10969496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 10979496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 10989496c9aeSAmlan Barua ptrdiff_t temp; 10998ca4c034SAmlan Barua PetscInt N1,tot_dim; 11004f48bc67SAmlan Barua #else 11014f48bc67SAmlan Barua PetscInt n1; 11029496c9aeSAmlan Barua #endif 11039496c9aeSAmlan Barua 1104b2b77a04SHong Zhang PetscFunctionBegin; 1105ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1106b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 110711902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1108c92418ccSAmlan Barua 11091878d478SAmlan Barua fftw_mpi_init(); 111011902ff2SHong Zhang pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 111111902ff2SHong Zhang pdim[0] = dim[0]; 11128ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11138ca4c034SAmlan Barua tot_dim = 2*dim[0]; 11148ca4c034SAmlan Barua #endif 11153564f093SHong Zhang for (ctr=1; ctr<ndim; ctr++) { 11166882372aSHong Zhang partial_dim *= dim[ctr]; 111711902ff2SHong Zhang pdim[ctr] = dim[ctr]; 11188ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1119db4deed7SKarl Rupp if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1120db4deed7SKarl Rupp else tot_dim *= dim[ctr]; 11218ca4c034SAmlan Barua #endif 11226882372aSHong Zhang } 11233c3a9c75SAmlan Barua 1124b2b77a04SHong Zhang if (size == 1) { 1125e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1126b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1127b2b77a04SHong Zhang n = N; 1128e81bb599SAmlan Barua #else 1129e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1130e81bb599SAmlan Barua n = tot_dim; 1131e81bb599SAmlan Barua #endif 1132e81bb599SAmlan Barua 1133b2b77a04SHong Zhang } else { 11341abc6020SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 1135b2b77a04SHong Zhang switch (ndim) { 1136b2b77a04SHong Zhang case 1: 11373c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11383c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1139e5d7f247SAmlan Barua #else 11409496c9aeSAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 114126fbe8dcSKarl Rupp 11426882372aSHong Zhang n = (PetscInt)local_n0; 11439496c9aeSAmlan Barua n1 = (PetscInt)local_n1; 11449496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1145e5d7f247SAmlan Barua #endif 1146b2b77a04SHong Zhang break; 1147b2b77a04SHong Zhang case 2: 11485b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 1149b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1150b2b77a04SHong Zhang /* 1151b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1152b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 1153b2b77a04SHong Zhang */ 1154b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1155b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 11565b3e41ffSAmlan Barua #else 11574f8276c3SHong Zhang alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 115826fbe8dcSKarl Rupp 11595b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1160c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 11615b3e41ffSAmlan Barua #endif 1162b2b77a04SHong Zhang break; 1163b2b77a04SHong Zhang case 3: 116451d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 116551d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 116626fbe8dcSKarl Rupp 11676882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 11686882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 116951d1eed7SAmlan Barua #else 11704f8276c3SHong Zhang alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 117126fbe8dcSKarl Rupp 117251d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1173c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 117451d1eed7SAmlan Barua #endif 1175b2b77a04SHong Zhang break; 1176b2b77a04SHong Zhang default: 1177b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 117811902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 117926fbe8dcSKarl Rupp 11806882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 11816882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1182b3a17365SAmlan Barua #else 1183b3a17365SAmlan Barua temp = pdim[ndim-1]; 118426fbe8dcSKarl Rupp 1185b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 118626fbe8dcSKarl Rupp 11874f8276c3SHong Zhang alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 118826fbe8dcSKarl Rupp 1189b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1190b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 119126fbe8dcSKarl Rupp 1192b3a17365SAmlan Barua pdim[ndim-1] = temp; 119326fbe8dcSKarl Rupp 1194c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1195b3a17365SAmlan Barua #endif 1196b2b77a04SHong Zhang break; 1197b2b77a04SHong Zhang } 1198b2b77a04SHong Zhang } 1199b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1200b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1201b2b77a04SHong Zhang fft->data = (void*)fftw; 1202b2b77a04SHong Zhang 1203b2b77a04SHong Zhang fft->n = n; 12040c74a584SJed Brown fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1205e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 120626fbe8dcSKarl Rupp 1207c92418ccSAmlan Barua ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t*)&(fftw->dim_fftw));CHKERRQ(ierr); 120826fbe8dcSKarl Rupp 1209b1301b2eSHong Zhang for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1210c92418ccSAmlan Barua 1211b2b77a04SHong Zhang fftw->p_forward = 0; 1212b2b77a04SHong Zhang fftw->p_backward = 0; 1213b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1214b2b77a04SHong Zhang 1215b2b77a04SHong Zhang if (size == 1) { 1216b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1217b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1218b2b77a04SHong Zhang } else { 1219b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1220b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1221b2b77a04SHong Zhang } 1222b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1223b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 12244a52bad8SHong Zhang A->preallocated = PETSC_TRUE; 122526fbe8dcSKarl Rupp 122600de8ff0SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);CHKERRQ(ierr); 122700de8ff0SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 122800de8ff0SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1229b2b77a04SHong Zhang 1230b2b77a04SHong Zhang /* get runtime options */ 1231ce94432eSBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1232b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 123326fbe8dcSKarl Rupp if (flg) fftw->p_flag = (unsigned)p_flag; 12344a52bad8SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 1235b2b77a04SHong Zhang PetscFunctionReturn(0); 1236b2b77a04SHong Zhang } 12373c3a9c75SAmlan Barua 12383c3a9c75SAmlan Barua 12393c3a9c75SAmlan Barua 12403c3a9c75SAmlan Barua 1241