1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14e5d7f247SAmlan Barua PetscInt partial_dim; 15b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 16b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 19b2b77a04SHong Zhang } Mat_FFTW; 20b2b77a04SHong Zhang 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27b2b77a04SHong Zhang 2897504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel 2997504071SAmlan Barua Input parameter: 303564f093SHong Zhang A - the matrix 3197504071SAmlan Barua x - the vector on which FDFT will be performed 3297504071SAmlan Barua 3397504071SAmlan Barua Output parameter: 3497504071SAmlan Barua y - vector that stores result of FDFT 3597504071SAmlan Barua */ 36b2b77a04SHong Zhang #undef __FUNCT__ 37b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 38b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 39b2b77a04SHong Zhang { 40b2b77a04SHong Zhang PetscErrorCode ierr; 41b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 42b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 43b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 44b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 45b2b77a04SHong Zhang 46b2b77a04SHong Zhang PetscFunctionBegin; 47b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 48b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 49b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 50b2b77a04SHong Zhang switch (ndim) { 51b2b77a04SHong Zhang case 1: 5258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 53b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 5458a912c5SAmlan Barua #else 5558a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 5658a912c5SAmlan Barua #endif 57b2b77a04SHong Zhang break; 58b2b77a04SHong Zhang case 2: 5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 60b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6158a912c5SAmlan Barua #else 6258a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 6358a912c5SAmlan Barua #endif 64b2b77a04SHong Zhang break; 65b2b77a04SHong Zhang case 3: 6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 67b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6858a912c5SAmlan Barua #else 6958a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 7058a912c5SAmlan Barua #endif 71b2b77a04SHong Zhang break; 72b2b77a04SHong Zhang default: 7358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 74b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 7558a912c5SAmlan Barua #else 7658a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 7758a912c5SAmlan Barua #endif 78b2b77a04SHong Zhang break; 79b2b77a04SHong Zhang } 80b2b77a04SHong Zhang fftw->finarray = x_array; 81b2b77a04SHong Zhang fftw->foutarray = y_array; 82b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 83b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 84b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 85b2b77a04SHong Zhang } else { /* use existing plan */ 86b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 87b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 88b2b77a04SHong Zhang } else { 89b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 90b2b77a04SHong Zhang } 91b2b77a04SHong Zhang } 92b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 93b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 94b2b77a04SHong Zhang PetscFunctionReturn(0); 95b2b77a04SHong Zhang } 96b2b77a04SHong Zhang 9797504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT 9897504071SAmlan Barua Input parameter: 993564f093SHong Zhang A - the matrix 10097504071SAmlan Barua x - the vector on which BDFT will be performed 10197504071SAmlan Barua 10297504071SAmlan Barua Output parameter: 10397504071SAmlan Barua y - vector that stores result of BDFT 10497504071SAmlan Barua */ 10597504071SAmlan Barua 106b2b77a04SHong Zhang #undef __FUNCT__ 107b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 108b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 109b2b77a04SHong Zhang { 110b2b77a04SHong Zhang PetscErrorCode ierr; 111b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 112b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 113b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 114b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 115b2b77a04SHong Zhang 116b2b77a04SHong Zhang PetscFunctionBegin; 117b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 118b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 119b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 120b2b77a04SHong Zhang switch (ndim) { 121b2b77a04SHong Zhang case 1: 12258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 123b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12458a912c5SAmlan Barua #else 125e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 12658a912c5SAmlan Barua #endif 127b2b77a04SHong Zhang break; 128b2b77a04SHong Zhang case 2: 12958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 130b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 13158a912c5SAmlan Barua #else 132e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 13358a912c5SAmlan Barua #endif 134b2b77a04SHong Zhang break; 135b2b77a04SHong Zhang case 3: 13658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 137b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 13858a912c5SAmlan Barua #else 139e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 14058a912c5SAmlan Barua #endif 141b2b77a04SHong Zhang break; 142b2b77a04SHong Zhang default: 14358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 144b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 14558a912c5SAmlan Barua #else 146e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 14758a912c5SAmlan Barua #endif 148b2b77a04SHong Zhang break; 149b2b77a04SHong Zhang } 150b2b77a04SHong Zhang fftw->binarray = x_array; 151b2b77a04SHong Zhang fftw->boutarray = y_array; 152b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 153b2b77a04SHong Zhang } else { /* use existing plan */ 154b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 155b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 156b2b77a04SHong Zhang } else { 157b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 158b2b77a04SHong Zhang } 159b2b77a04SHong Zhang } 160b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 161b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 162b2b77a04SHong Zhang PetscFunctionReturn(0); 163b2b77a04SHong Zhang } 164b2b77a04SHong Zhang 16597504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel 16697504071SAmlan Barua Input parameter: 1673564f093SHong Zhang A - the matrix 16897504071SAmlan Barua x - the vector on which FDFT will be performed 16997504071SAmlan Barua 17097504071SAmlan Barua Output parameter: 17197504071SAmlan Barua y - vector that stores result of FDFT 17297504071SAmlan Barua */ 173b2b77a04SHong Zhang #undef __FUNCT__ 174b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 175b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 176b2b77a04SHong Zhang { 177b2b77a04SHong Zhang PetscErrorCode ierr; 178b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 179b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 180b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 181c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 182b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 183b2b77a04SHong Zhang 184b2b77a04SHong Zhang PetscFunctionBegin; 185b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 186b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 187b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 188b2b77a04SHong Zhang switch (ndim) { 189b2b77a04SHong Zhang case 1: 1903c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 191b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 192ae0a50aaSHong Zhang #else 1934f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 1943c3a9c75SAmlan Barua #endif 195b2b77a04SHong Zhang break; 196b2b77a04SHong Zhang case 2: 19797504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 198b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1993c3a9c75SAmlan Barua #else 2003c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2013c3a9c75SAmlan Barua #endif 202b2b77a04SHong Zhang break; 203b2b77a04SHong Zhang case 3: 2043c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 205b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2063c3a9c75SAmlan Barua #else 20751d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2083c3a9c75SAmlan Barua #endif 209b2b77a04SHong Zhang break; 210b2b77a04SHong Zhang default: 2113c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 212c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2133c3a9c75SAmlan Barua #else 2143c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2153c3a9c75SAmlan Barua #endif 216b2b77a04SHong Zhang break; 217b2b77a04SHong Zhang } 218b2b77a04SHong Zhang fftw->finarray = x_array; 219b2b77a04SHong Zhang fftw->foutarray = y_array; 220b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 221b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 222b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 223b2b77a04SHong Zhang } else { /* use existing plan */ 224b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 225b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 226b2b77a04SHong Zhang } else { 227b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 228b2b77a04SHong Zhang } 229b2b77a04SHong Zhang } 230b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 231b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 232b2b77a04SHong Zhang PetscFunctionReturn(0); 233b2b77a04SHong Zhang } 234b2b77a04SHong Zhang 23597504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT 23697504071SAmlan Barua Input parameter: 2373564f093SHong Zhang A - the matrix 23897504071SAmlan Barua x - the vector on which BDFT will be performed 23997504071SAmlan Barua 24097504071SAmlan Barua Output parameter: 24197504071SAmlan Barua y - vector that stores result of BDFT 24297504071SAmlan Barua */ 243b2b77a04SHong Zhang #undef __FUNCT__ 244b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 245b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 246b2b77a04SHong Zhang { 247b2b77a04SHong Zhang PetscErrorCode ierr; 248b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 249b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 250b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 251c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 252b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 253b2b77a04SHong Zhang 254b2b77a04SHong Zhang PetscFunctionBegin; 255b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 256b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 257b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 258b2b77a04SHong Zhang switch (ndim) { 259b2b77a04SHong Zhang case 1: 2603c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 261b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 262ae0a50aaSHong Zhang #else 2634f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 2643c3a9c75SAmlan Barua #endif 265b2b77a04SHong Zhang break; 266b2b77a04SHong Zhang case 2: 26797504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 268b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2693c3a9c75SAmlan Barua #else 2703c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2713c3a9c75SAmlan Barua #endif 272b2b77a04SHong Zhang break; 273b2b77a04SHong Zhang case 3: 2743c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 275b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2763c3a9c75SAmlan Barua #else 2773c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2783c3a9c75SAmlan Barua #endif 279b2b77a04SHong Zhang break; 280b2b77a04SHong Zhang default: 2813c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 282c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2833c3a9c75SAmlan Barua #else 2843c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2853c3a9c75SAmlan Barua #endif 286b2b77a04SHong Zhang break; 287b2b77a04SHong Zhang } 288b2b77a04SHong Zhang fftw->binarray = x_array; 289b2b77a04SHong Zhang fftw->boutarray = y_array; 290b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 291b2b77a04SHong Zhang } else { /* use existing plan */ 292b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 293b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 294b2b77a04SHong Zhang } else { 295b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 296b2b77a04SHong Zhang } 297b2b77a04SHong Zhang } 298b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 299b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 300b2b77a04SHong Zhang PetscFunctionReturn(0); 301b2b77a04SHong Zhang } 302b2b77a04SHong Zhang 303b2b77a04SHong Zhang #undef __FUNCT__ 304b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 305b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 306b2b77a04SHong Zhang { 307b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 308b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 309b2b77a04SHong Zhang PetscErrorCode ierr; 310b2b77a04SHong Zhang 311b2b77a04SHong Zhang PetscFunctionBegin; 312b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 313b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 314b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 315bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 3166ccf0b3eSHong Zhang fftw_mpi_cleanup(); 317b2b77a04SHong Zhang PetscFunctionReturn(0); 318b2b77a04SHong Zhang } 319b2b77a04SHong Zhang 320c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 321b2b77a04SHong Zhang #undef __FUNCT__ 322b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 323b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 324b2b77a04SHong Zhang { 325b2b77a04SHong Zhang PetscErrorCode ierr; 326b2b77a04SHong Zhang PetscScalar *array; 327b2b77a04SHong Zhang 328b2b77a04SHong Zhang PetscFunctionBegin; 329b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 330b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 331b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 332b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 333b2b77a04SHong Zhang PetscFunctionReturn(0); 334b2b77a04SHong Zhang } 335b2b77a04SHong Zhang 3364f7415efSAmlan Barua #undef __FUNCT__ 3374be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW" 3384be45526SHong Zhang /*@ 339b2aa233eSHong Zhang MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 3404f7415efSAmlan Barua parallel layout determined by FFTW 3414f7415efSAmlan Barua 3424f7415efSAmlan Barua Collective on Mat 3434f7415efSAmlan Barua 3444f7415efSAmlan Barua Input Parameter: 3453564f093SHong Zhang . A - the matrix 3464f7415efSAmlan Barua 3474f7415efSAmlan Barua Output Parameter: 348cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW 349cc55f3b1SHong Zhang - y - (optional) output vector of forward FFTW 350cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW 3514f7415efSAmlan Barua 3524f7415efSAmlan Barua Level: advanced 3533564f093SHong Zhang 35497504071SAmlan Barua Note: The parallel layout of output of forward FFTW is always same as the input 35597504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 35697504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 35797504071SAmlan Barua Also note that we need to provide enough space while doing parallel real transform. 35897504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 35997504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 360e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 361e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two 362e0ec536eSAmlan Barua zeros if it is odd only one zero is needed. 363e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local 364e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for 365e0ec536eSAmlan Barua each processor and returns that. 3664f7415efSAmlan Barua 3674f7415efSAmlan Barua .seealso: MatCreateFFTW() 3684be45526SHong Zhang @*/ 3694be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 3704be45526SHong Zhang { 3714be45526SHong Zhang PetscErrorCode ierr; 372b4c3921fSHong Zhang 3734be45526SHong Zhang PetscFunctionBegin; 3744be45526SHong Zhang ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 3754be45526SHong Zhang PetscFunctionReturn(0); 3764be45526SHong Zhang } 3774be45526SHong Zhang 3784f7415efSAmlan Barua EXTERN_C_BEGIN 3794be45526SHong Zhang #undef __FUNCT__ 3804be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW" 3814be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 3824f7415efSAmlan Barua { 3834f7415efSAmlan Barua PetscErrorCode ierr; 3844f7415efSAmlan Barua PetscMPIInt size,rank; 3854f7415efSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 3864f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 3874f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 3889496c9aeSAmlan Barua PetscInt N=fft->N; 3894f7415efSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 3904f7415efSAmlan Barua 3914f7415efSAmlan Barua PetscFunctionBegin; 3924f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 3934f7415efSAmlan Barua PetscValidType(A,1); 3944f7415efSAmlan Barua 3954f8276c3SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 3964f8276c3SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 3974f7415efSAmlan Barua if (size == 1) { /* sequential case */ 3984f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 3994f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 4004f7415efSAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 4014f7415efSAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 4024f7415efSAmlan Barua #else 4038ca4c034SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 4048ca4c034SAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 4058ca4c034SAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 4064f7415efSAmlan Barua #endif 40797504071SAmlan Barua } else { /* parallel cases */ 4084f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 4099496c9aeSAmlan Barua ptrdiff_t local_n1; 4109496c9aeSAmlan Barua fftw_complex *data_fout; 4119496c9aeSAmlan Barua ptrdiff_t local_1_start; 4129496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4139496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 4149496c9aeSAmlan Barua #else 4154f7415efSAmlan Barua double *data_finr,*data_boutr; 4166ccf0b3eSHong Zhang PetscInt n1,N1; 4179496c9aeSAmlan Barua ptrdiff_t temp; 4189496c9aeSAmlan Barua #endif 4199496c9aeSAmlan Barua 4204f7415efSAmlan Barua switch (ndim) { 4214f7415efSAmlan Barua case 1: 42257625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4234f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 42457625b84SAmlan Barua #else 42557625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 42657625b84SAmlan Barua if (fin) { 42757625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 428778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 42957625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 43057625b84SAmlan Barua } 43157625b84SAmlan Barua if (fout) { 43257625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 433778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 43457625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 43557625b84SAmlan Barua } 43657625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 43757625b84SAmlan Barua if (bout) { 43857625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 439778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 44057625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 44157625b84SAmlan Barua } 44257625b84SAmlan Barua break; 44357625b84SAmlan Barua #endif 4444f7415efSAmlan Barua case 2: 44597504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 4464f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4474f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 4484f7415efSAmlan Barua if (fin) { 4494f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 450778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 4514f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4524f7415efSAmlan Barua } 4534f7415efSAmlan Barua if (bout) { 4544f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 455778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 4564f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4574f7415efSAmlan Barua } 45857625b84SAmlan Barua if (fout) { 45957625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 460778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 46157625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 46257625b84SAmlan Barua } 4634f7415efSAmlan Barua #else 4644f7415efSAmlan Barua /* Get local size */ 4654f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 4664f7415efSAmlan Barua if (fin) { 4674f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 468778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4694f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4704f7415efSAmlan Barua } 4714f7415efSAmlan Barua if (fout) { 4724f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 473778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4744f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4754f7415efSAmlan Barua } 4764f7415efSAmlan Barua if (bout) { 4774f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 478778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 4794f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4804f7415efSAmlan Barua } 4814f7415efSAmlan Barua #endif 4824f7415efSAmlan Barua break; 4834f7415efSAmlan Barua case 3: 4844f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4854f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4864f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 4874f7415efSAmlan Barua if (fin) { 4884f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 489778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 4904f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4914f7415efSAmlan Barua } 4924f7415efSAmlan Barua if (bout) { 4934f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 494778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 4954f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4964f7415efSAmlan Barua } 4973564f093SHong Zhang 49857625b84SAmlan Barua if (fout) { 49957625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 500778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 50157625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 50257625b84SAmlan Barua } 5034f7415efSAmlan Barua #else 5040c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 5050c9b18e4SAmlan Barua if (fin) { 5060c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 507778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5080c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5090c9b18e4SAmlan Barua } 5100c9b18e4SAmlan Barua if (fout) { 5110c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 512778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5130c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5140c9b18e4SAmlan Barua } 5150c9b18e4SAmlan Barua if (bout) { 5160c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 517778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5180c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5190c9b18e4SAmlan Barua } 5204f7415efSAmlan Barua #endif 5214f7415efSAmlan Barua break; 5224f7415efSAmlan Barua default: 5234f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5244f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 5254f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 5264f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5274f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 5284f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 5294f7415efSAmlan Barua 5304f7415efSAmlan Barua if (fin) { 5314f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 532778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 5334f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5344f7415efSAmlan Barua } 5354f7415efSAmlan Barua if (bout) { 5364f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 537778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 5389496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5394f7415efSAmlan Barua } 5403564f093SHong Zhang 54157625b84SAmlan Barua if (fout) { 54257625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 543778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 54457625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 54557625b84SAmlan Barua } 5464f7415efSAmlan Barua #else 5470c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 5480c9b18e4SAmlan Barua if (fin) { 5490c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 550778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5510c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5520c9b18e4SAmlan Barua } 5530c9b18e4SAmlan Barua if (fout) { 5540c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 555778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5560c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5570c9b18e4SAmlan Barua } 5580c9b18e4SAmlan Barua if (bout) { 5590c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 560778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5610c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5620c9b18e4SAmlan Barua } 5634f7415efSAmlan Barua #endif 5644f7415efSAmlan Barua break; 5654f7415efSAmlan Barua } 5664f7415efSAmlan Barua } 5674f7415efSAmlan Barua PetscFunctionReturn(0); 5684f7415efSAmlan Barua } 5694f7415efSAmlan Barua EXTERN_C_END 5704f7415efSAmlan Barua 571c92418ccSAmlan Barua #undef __FUNCT__ 57274a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW" 5733564f093SHong Zhang /*@ 5743564f093SHong Zhang VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 57554efbe56SHong Zhang 5763564f093SHong Zhang Collective on Mat 5773564f093SHong Zhang 5783564f093SHong Zhang Input Parameters: 5793564f093SHong Zhang + A - FFTW matrix 5803564f093SHong Zhang - x - the PETSc vector 5813564f093SHong Zhang 5823564f093SHong Zhang Output Parameters: 5833564f093SHong Zhang . y - the FFTW vector 5843564f093SHong Zhang 585b2b77a04SHong Zhang Options Database Keys: 5863564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags 587b2b77a04SHong Zhang 588b2b77a04SHong Zhang Level: intermediate 5893564f093SHong Zhang 59097504071SAmlan Barua Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 59197504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 59297504071SAmlan Barua zeros. This routine does that job by scattering operation. 59397504071SAmlan Barua 5943564f093SHong Zhang .seealso: VecScatterFFTWToPetsc() 5953564f093SHong Zhang @*/ 5963564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 5973564f093SHong Zhang { 5983564f093SHong Zhang PetscErrorCode ierr; 599b2b77a04SHong Zhang 6003564f093SHong Zhang PetscFunctionBegin; 6013564f093SHong Zhang ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 6023564f093SHong Zhang PetscFunctionReturn(0); 6033564f093SHong Zhang } 6043c3a9c75SAmlan Barua 6053c3a9c75SAmlan Barua EXTERN_C_BEGIN 6063c3a9c75SAmlan Barua #undef __FUNCT__ 6071986ecc6SHong Zhang #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 60874a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 6093c3a9c75SAmlan Barua { 6103c3a9c75SAmlan Barua PetscErrorCode ierr; 6113c3a9c75SAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 6123c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 6133c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 6149496c9aeSAmlan Barua PetscInt N=fft->N; 615b5d11533SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 6169496c9aeSAmlan Barua PetscInt low; 6178ca4c034SAmlan Barua PetscInt rank,size,vsize,vsize1; 6183c3a9c75SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 6199496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 6203c3a9c75SAmlan Barua VecScatter vecscat; 6213c3a9c75SAmlan Barua IS list1,list2; 6229496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6239496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 6249496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 6259496c9aeSAmlan Barua PetscInt N1, n1 ,NM; 6269496c9aeSAmlan Barua ptrdiff_t temp; 6279496c9aeSAmlan Barua #endif 6283c3a9c75SAmlan Barua 6293564f093SHong Zhang PetscFunctionBegin; 630f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 631f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 6323c3a9c75SAmlan Barua ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 6333c3a9c75SAmlan Barua 6343564f093SHong Zhang if (size==1) { 6358ca4c034SAmlan Barua ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 6368ca4c034SAmlan Barua ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 6379496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 6386971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 6396971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6406971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6416971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 642b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 6433564f093SHong Zhang } else { 6443c3a9c75SAmlan Barua switch (ndim) { 6453c3a9c75SAmlan Barua case 1: 64664657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 64764657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 6484f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 6494f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,low,1,&list2); 65064657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 65164657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 65264657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 65364657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 65464657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 65564657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 65664657d84SAmlan Barua #else 657e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 65864657d84SAmlan Barua #endif 6593c3a9c75SAmlan Barua break; 6603c3a9c75SAmlan Barua case 2: 661bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 662bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 6634f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 6644f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 665bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 666bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 667bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 668bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 669bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 670bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 671bd59e6c6SAmlan Barua #else 6723c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 6733c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 6749496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 6759496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 6763c3a9c75SAmlan Barua 6773564f093SHong Zhang if (dim[1]%2==0) { 6783c3a9c75SAmlan Barua NM = dim[1]+2; 6793564f093SHong Zhang } else { 6803c3a9c75SAmlan Barua NM = dim[1]+1; 6813564f093SHong Zhang } 6823c3a9c75SAmlan Barua for (i=0;i<local_n0;i++) { 6833c3a9c75SAmlan Barua for (j=0;j<dim[1];j++) { 6843c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 6853c3a9c75SAmlan Barua tempindx1 = i*NM + j; 6865b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 6873c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 6883c3a9c75SAmlan Barua } 6893c3a9c75SAmlan Barua } 6903c3a9c75SAmlan Barua 6913c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 6923c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 6933c3a9c75SAmlan Barua 694f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 695f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 696f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 697f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 698b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 699b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 700b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 701b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 702bd59e6c6SAmlan Barua #endif 7039496c9aeSAmlan Barua break; 7043c3a9c75SAmlan Barua 7053c3a9c75SAmlan Barua case 3: 706bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 707bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 7084f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 7094f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 710bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 711bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 712bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 713bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 714bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 715bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 716bd59e6c6SAmlan Barua #else 71751d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 71851d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 71951d1eed7SAmlan Barua 7209496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 7219496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 72251d1eed7SAmlan Barua 723db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 724db4deed7SKarl Rupp else NM = dim[2]+1; 72551d1eed7SAmlan Barua 72651d1eed7SAmlan Barua for (i=0;i<local_n0;i++) { 72751d1eed7SAmlan Barua for (j=0;j<dim[1];j++) { 72851d1eed7SAmlan Barua for (k=0;k<dim[2];k++) { 72951d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 73051d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 73151d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 73251d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 73351d1eed7SAmlan Barua } 73451d1eed7SAmlan Barua } 73551d1eed7SAmlan Barua } 73651d1eed7SAmlan Barua 73751d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 73851d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 73951d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 74051d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 74151d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 74251d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 743b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 744b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 745b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 746b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 747bd59e6c6SAmlan Barua #endif 7489496c9aeSAmlan Barua break; 7493c3a9c75SAmlan Barua 7503c3a9c75SAmlan Barua default: 751bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 752bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 7534f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 7544f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 755bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 756bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 757bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 758bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 759bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 760bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 761bd59e6c6SAmlan Barua #else 762e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 763e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 764e5d7f247SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 765e5d7f247SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 766e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 767e5d7f247SAmlan Barua 768e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 769e5d7f247SAmlan Barua 770e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 771e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 772e5d7f247SAmlan Barua 773db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 774db4deed7SKarl Rupp else NM = 1; 775e5d7f247SAmlan Barua 7766971673cSAmlan Barua j = low; 7773564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 7786971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 7796971673cSAmlan Barua indx2[i] = j; 7803564f093SHong Zhang if (k%dim[ndim-1]==0) { j+=NM;} 7816971673cSAmlan Barua j++; 7826971673cSAmlan Barua } 7836971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 7846971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 7856971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 7866971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7876971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7886971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 789b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 790b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 791b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 792b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 793bd59e6c6SAmlan Barua #endif 7949496c9aeSAmlan Barua break; 7953c3a9c75SAmlan Barua } 796e81bb599SAmlan Barua } 7973564f093SHong Zhang PetscFunctionReturn(0); 7983c3a9c75SAmlan Barua } 7993c3a9c75SAmlan Barua EXTERN_C_END 8003c3a9c75SAmlan Barua 8013c3a9c75SAmlan Barua #undef __FUNCT__ 80274a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc" 8033564f093SHong Zhang /*@ 8043564f093SHong Zhang VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 8053564f093SHong Zhang 8063564f093SHong Zhang Collective on Mat 8073564f093SHong Zhang 8083564f093SHong Zhang Input Parameters: 8093564f093SHong Zhang + A - FFTW matrix 8103564f093SHong Zhang - x - FFTW vector 8113564f093SHong Zhang 8123564f093SHong Zhang Output Parameters: 8133564f093SHong Zhang . y - PETSc vector 8143564f093SHong Zhang 8153564f093SHong Zhang Level: intermediate 8163564f093SHong Zhang 8173564f093SHong Zhang Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 8183564f093SHong Zhang VecScatterFFTWToPetsc removes those extra zeros. 8193564f093SHong Zhang 8203564f093SHong Zhang .seealso: VecScatterPetscToFFTW() 8213564f093SHong Zhang @*/ 82274a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 8233c3a9c75SAmlan Barua { 8243c3a9c75SAmlan Barua PetscErrorCode ierr; 825*5fd66863SKarl Rupp 8263c3a9c75SAmlan Barua PetscFunctionBegin; 82774a26884SAmlan Barua ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 8283c3a9c75SAmlan Barua PetscFunctionReturn(0); 8293c3a9c75SAmlan Barua } 83054efbe56SHong Zhang 8313c3a9c75SAmlan Barua EXTERN_C_BEGIN 8323c3a9c75SAmlan Barua #undef __FUNCT__ 83374a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 83474a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 8355b3e41ffSAmlan Barua { 8365b3e41ffSAmlan Barua PetscErrorCode ierr; 8375b3e41ffSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 8385b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 8395b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 8409496c9aeSAmlan Barua PetscInt N=fft->N; 841b3655f67SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 8429496c9aeSAmlan Barua PetscInt low; 8439496c9aeSAmlan Barua PetscInt rank,size; 8445b3e41ffSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 8459496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 8465b3e41ffSAmlan Barua VecScatter vecscat; 8475b3e41ffSAmlan Barua IS list1,list2; 8489496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 8499496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 8509496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 8519496c9aeSAmlan Barua PetscInt N1, n1 ,NM; 8529496c9aeSAmlan Barua ptrdiff_t temp; 8539496c9aeSAmlan Barua #endif 8549496c9aeSAmlan Barua 8553564f093SHong Zhang PetscFunctionBegin; 8565b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 8575b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 858b3655f67SAmlan Barua ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr); 8595b3e41ffSAmlan Barua 860e81bb599SAmlan Barua if (size==1) { 861b3655f67SAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 8626971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 8636971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8646971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8656971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 866b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 867e81bb599SAmlan Barua 8683564f093SHong Zhang } else { 869e81bb599SAmlan Barua switch (ndim) { 870e81bb599SAmlan Barua case 1: 87164657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 87264657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 8734f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 8744f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,low,1,&list2); 87564657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 87664657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 87764657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 87864657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 87964657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 88064657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 88164657d84SAmlan Barua #else 8826a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 88364657d84SAmlan Barua #endif 8845b3e41ffSAmlan Barua break; 8855b3e41ffSAmlan Barua case 2: 886bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 887bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 8884f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 8894f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 890bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 891bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 892bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 893bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 894bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 895bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 896bd59e6c6SAmlan Barua #else 8975b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 8985b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 8995b3e41ffSAmlan Barua 9009496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 9019496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 9025b3e41ffSAmlan Barua 903db4deed7SKarl Rupp if (dim[1]%2==0) NM = dim[1]+2; 904db4deed7SKarl Rupp else NM = dim[1]+1; 9055b3e41ffSAmlan Barua 9065b3e41ffSAmlan Barua for (i=0;i<local_n0;i++) { 9075b3e41ffSAmlan Barua for (j=0;j<dim[1];j++) { 9085b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 9095b3e41ffSAmlan Barua tempindx1 = i*NM + j; 9105b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 9115b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 9125b3e41ffSAmlan Barua } 9135b3e41ffSAmlan Barua } 9145b3e41ffSAmlan Barua 9155b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 9165b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 9175b3e41ffSAmlan Barua 9185b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 9195b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9205b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9215b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 922b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 923b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 924b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 925b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 926bd59e6c6SAmlan Barua #endif 9279496c9aeSAmlan Barua break; 9285b3e41ffSAmlan Barua case 3: 929bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 930bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 9314f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 9324f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 933bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 934bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 935bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 936bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 937bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 938bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 939bd59e6c6SAmlan Barua #else 940bd59e6c6SAmlan Barua 94151d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 94251d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 94351d1eed7SAmlan Barua 9449496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 9459496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 94651d1eed7SAmlan Barua 947db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 948db4deed7SKarl Rupp else NM = dim[2]+1; 94951d1eed7SAmlan Barua 95051d1eed7SAmlan Barua for (i=0;i<local_n0;i++) { 95151d1eed7SAmlan Barua for (j=0;j<dim[1];j++) { 95251d1eed7SAmlan Barua for (k=0;k<dim[2];k++) { 95351d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 95451d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 95551d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 95651d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 95751d1eed7SAmlan Barua } 95851d1eed7SAmlan Barua } 95951d1eed7SAmlan Barua } 96051d1eed7SAmlan Barua 96151d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 96251d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 96351d1eed7SAmlan Barua 96451d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 96551d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 96651d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 96751d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 968b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 969b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 970b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 971b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 972bd59e6c6SAmlan Barua #endif 9739496c9aeSAmlan Barua break; 9745b3e41ffSAmlan Barua default: 975bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 976bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 9774f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 9784f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 979bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 980bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 981bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 982bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 983bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 984bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 985bd59e6c6SAmlan Barua #else 986ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 987ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 988ba6e06d0SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 989ba6e06d0SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 990ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 991ba6e06d0SAmlan Barua 992ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 993ba6e06d0SAmlan Barua 994ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 995ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 996ba6e06d0SAmlan Barua 997db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 998db4deed7SKarl Rupp else NM = 1; 999ba6e06d0SAmlan Barua 1000ba6e06d0SAmlan Barua j = low; 10013564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 1002ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1003ba6e06d0SAmlan Barua indx2[i] = j; 10043564f093SHong Zhang if (k%dim[ndim-1]==0)j+=NM; 1005ba6e06d0SAmlan Barua j++; 1006ba6e06d0SAmlan Barua } 1007ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1008ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1009ba6e06d0SAmlan Barua 1010ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1011ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1012ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1013ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1014b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1015b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1016b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1017b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1018bd59e6c6SAmlan Barua #endif 10199496c9aeSAmlan Barua break; 10205b3e41ffSAmlan Barua } 1021e81bb599SAmlan Barua } 10223564f093SHong Zhang PetscFunctionReturn(0); 10235b3e41ffSAmlan Barua } 10245b3e41ffSAmlan Barua EXTERN_C_END 10255b3e41ffSAmlan Barua 10265b3e41ffSAmlan Barua EXTERN_C_BEGIN 10275b3e41ffSAmlan Barua #undef __FUNCT__ 10283c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 10293c3a9c75SAmlan Barua /* 10303564f093SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 10313564f093SHong Zhang 10323c3a9c75SAmlan Barua Options Database Keys: 10333c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 10343c3a9c75SAmlan Barua 10353c3a9c75SAmlan Barua Level: intermediate 10363c3a9c75SAmlan Barua 10373c3a9c75SAmlan Barua */ 1038b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 1039b2b77a04SHong Zhang { 1040b2b77a04SHong Zhang PetscErrorCode ierr; 1041b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 1042b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1043b2b77a04SHong Zhang Mat_FFTW *fftw; 1044b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1045b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1046b2b77a04SHong Zhang PetscBool flg; 10474f48bc67SAmlan Barua PetscInt p_flag,partial_dim=1,ctr; 104811902ff2SHong Zhang PetscMPIInt size,rank; 10499496c9aeSAmlan Barua ptrdiff_t *pdim; 10509496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 10519496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 10529496c9aeSAmlan Barua ptrdiff_t temp; 10538ca4c034SAmlan Barua PetscInt N1,tot_dim; 10544f48bc67SAmlan Barua #else 10554f48bc67SAmlan Barua PetscInt n1; 10569496c9aeSAmlan Barua #endif 10579496c9aeSAmlan Barua 1058b2b77a04SHong Zhang PetscFunctionBegin; 1059b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 106011902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1061c92418ccSAmlan Barua 10621878d478SAmlan Barua fftw_mpi_init(); 106311902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 106411902ff2SHong Zhang pdim[0] = dim[0]; 10658ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 10668ca4c034SAmlan Barua tot_dim = 2*dim[0]; 10678ca4c034SAmlan Barua #endif 10683564f093SHong Zhang for (ctr=1;ctr<ndim;ctr++) { 10696882372aSHong Zhang partial_dim *= dim[ctr]; 107011902ff2SHong Zhang pdim[ctr] = dim[ctr]; 10718ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1072db4deed7SKarl Rupp if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1073db4deed7SKarl Rupp else tot_dim *= dim[ctr]; 10748ca4c034SAmlan Barua #endif 10756882372aSHong Zhang } 10763c3a9c75SAmlan Barua 1077b2b77a04SHong Zhang if (size == 1) { 1078e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1079b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1080b2b77a04SHong Zhang n = N; 1081e81bb599SAmlan Barua #else 1082e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1083e81bb599SAmlan Barua n = tot_dim; 1084e81bb599SAmlan Barua #endif 1085e81bb599SAmlan Barua 1086b2b77a04SHong Zhang } else { 10871abc6020SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 1088b2b77a04SHong Zhang switch (ndim) { 1089b2b77a04SHong Zhang case 1: 10903c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 10913c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1092e5d7f247SAmlan Barua #else 10939496c9aeSAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 10946882372aSHong Zhang n = (PetscInt)local_n0; 10959496c9aeSAmlan Barua n1 = (PetscInt) local_n1; 10969496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1097e5d7f247SAmlan Barua #endif 1098b2b77a04SHong Zhang break; 1099b2b77a04SHong Zhang case 2: 11005b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 1101b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1102b2b77a04SHong Zhang /* 1103b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1104b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 1105b2b77a04SHong Zhang */ 1106b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1107b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 11085b3e41ffSAmlan Barua #else 11094f8276c3SHong Zhang alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 11105b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1111c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 11125b3e41ffSAmlan Barua #endif 1113b2b77a04SHong Zhang break; 1114b2b77a04SHong Zhang case 3: 111551d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 111651d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 11176882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 11186882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 111951d1eed7SAmlan Barua #else 11204f8276c3SHong Zhang alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 112151d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1122c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 112351d1eed7SAmlan Barua #endif 1124b2b77a04SHong Zhang break; 1125b2b77a04SHong Zhang default: 1126b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 112711902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 11286882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 11296882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1130b3a17365SAmlan Barua #else 1131b3a17365SAmlan Barua temp = pdim[ndim-1]; 1132b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 11334f8276c3SHong Zhang alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1134b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1135b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1136b3a17365SAmlan Barua pdim[ndim-1] = temp; 1137c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1138b3a17365SAmlan Barua #endif 1139b2b77a04SHong Zhang break; 1140b2b77a04SHong Zhang } 1141b2b77a04SHong Zhang } 1142b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1143b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1144b2b77a04SHong Zhang fft->data = (void*)fftw; 1145b2b77a04SHong Zhang 1146b2b77a04SHong Zhang fft->n = n; 11470c74a584SJed Brown fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1148e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 1149c92418ccSAmlan Barua ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1150b1301b2eSHong Zhang for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1151c92418ccSAmlan Barua 1152b2b77a04SHong Zhang fftw->p_forward = 0; 1153b2b77a04SHong Zhang fftw->p_backward = 0; 1154b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1155b2b77a04SHong Zhang 1156b2b77a04SHong Zhang if (size == 1) { 1157b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1158b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1159b2b77a04SHong Zhang } else { 1160b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1161b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1162b2b77a04SHong Zhang } 1163b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1164b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 11654a52bad8SHong Zhang A->preallocated = PETSC_TRUE; 11664a52bad8SHong Zhang ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);CHKERRQ(ierr); 11674a52bad8SHong Zhang ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 11684a52bad8SHong Zhang ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1169b2b77a04SHong Zhang 1170b2b77a04SHong Zhang /* get runtime options */ 1171b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1172b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1173b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 11744a52bad8SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 1175b2b77a04SHong Zhang PetscFunctionReturn(0); 1176b2b77a04SHong Zhang } 1177b2b77a04SHong Zhang EXTERN_C_END 11783c3a9c75SAmlan Barua 11793c3a9c75SAmlan Barua 11803c3a9c75SAmlan Barua 11813c3a9c75SAmlan Barua 1182