1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14e5d7f247SAmlan Barua PetscInt partial_dim; 15b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 16b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 19b2b77a04SHong Zhang } Mat_FFTW; 20b2b77a04SHong Zhang 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27b2b77a04SHong Zhang 2897504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel 2997504071SAmlan Barua Input parameter: 303564f093SHong Zhang A - the matrix 3197504071SAmlan Barua x - the vector on which FDFT will be performed 3297504071SAmlan Barua 3397504071SAmlan Barua Output parameter: 3497504071SAmlan Barua y - vector that stores result of FDFT 3597504071SAmlan Barua */ 36b2b77a04SHong Zhang #undef __FUNCT__ 37b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 38b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 39b2b77a04SHong Zhang { 40b2b77a04SHong Zhang PetscErrorCode ierr; 41b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 42b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 43b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 44b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 45b2b77a04SHong Zhang 46b2b77a04SHong Zhang PetscFunctionBegin; 47b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 48b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 49b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 50b2b77a04SHong Zhang switch (ndim) { 51b2b77a04SHong Zhang case 1: 5258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 53b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 5458a912c5SAmlan Barua #else 5558a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 5658a912c5SAmlan Barua #endif 57b2b77a04SHong Zhang break; 58b2b77a04SHong Zhang case 2: 5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 60b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6158a912c5SAmlan Barua #else 6258a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 6358a912c5SAmlan Barua #endif 64b2b77a04SHong Zhang break; 65b2b77a04SHong Zhang case 3: 6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 67b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6858a912c5SAmlan Barua #else 6958a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 7058a912c5SAmlan Barua #endif 71b2b77a04SHong Zhang break; 72b2b77a04SHong Zhang default: 7358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 74b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 75*5274ed1bSHong Zhang /* bug: dim[3] changes from 10 to 0 '--with-64-bit-indices=1' */ 7658a912c5SAmlan Barua #else 7758a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 7858a912c5SAmlan Barua #endif 79b2b77a04SHong Zhang break; 80b2b77a04SHong Zhang } 81b2b77a04SHong Zhang fftw->finarray = x_array; 82b2b77a04SHong Zhang fftw->foutarray = y_array; 83b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 84b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 85b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 86b2b77a04SHong Zhang } else { /* use existing plan */ 87b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 88b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 89b2b77a04SHong Zhang } else { 90b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 91b2b77a04SHong Zhang } 92b2b77a04SHong Zhang } 93b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 94b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 95b2b77a04SHong Zhang PetscFunctionReturn(0); 96b2b77a04SHong Zhang } 97b2b77a04SHong Zhang 9897504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT 9997504071SAmlan Barua Input parameter: 1003564f093SHong Zhang A - the matrix 10197504071SAmlan Barua x - the vector on which BDFT will be performed 10297504071SAmlan Barua 10397504071SAmlan Barua Output parameter: 10497504071SAmlan Barua y - vector that stores result of BDFT 10597504071SAmlan Barua */ 10697504071SAmlan Barua 107b2b77a04SHong Zhang #undef __FUNCT__ 108b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 109b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 110b2b77a04SHong Zhang { 111b2b77a04SHong Zhang PetscErrorCode ierr; 112b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 113b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 114b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 115b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 116b2b77a04SHong Zhang 117b2b77a04SHong Zhang PetscFunctionBegin; 118b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 119b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 120b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 121b2b77a04SHong Zhang switch (ndim) { 122b2b77a04SHong Zhang case 1: 12358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 124b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12558a912c5SAmlan Barua #else 126e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 12758a912c5SAmlan Barua #endif 128b2b77a04SHong Zhang break; 129b2b77a04SHong Zhang case 2: 13058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 131b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 13258a912c5SAmlan Barua #else 133e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 13458a912c5SAmlan Barua #endif 135b2b77a04SHong Zhang break; 136b2b77a04SHong Zhang case 3: 13758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 138b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 13958a912c5SAmlan Barua #else 140e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 14158a912c5SAmlan Barua #endif 142b2b77a04SHong Zhang break; 143b2b77a04SHong Zhang default: 14458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 145b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 14658a912c5SAmlan Barua #else 147e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 14858a912c5SAmlan Barua #endif 149b2b77a04SHong Zhang break; 150b2b77a04SHong Zhang } 151b2b77a04SHong Zhang fftw->binarray = x_array; 152b2b77a04SHong Zhang fftw->boutarray = y_array; 153b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 154b2b77a04SHong Zhang } else { /* use existing plan */ 155b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 156b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 157b2b77a04SHong Zhang } else { 158b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 159b2b77a04SHong Zhang } 160b2b77a04SHong Zhang } 161b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 162b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 163b2b77a04SHong Zhang PetscFunctionReturn(0); 164b2b77a04SHong Zhang } 165b2b77a04SHong Zhang 16697504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel 16797504071SAmlan Barua Input parameter: 1683564f093SHong Zhang A - the matrix 16997504071SAmlan Barua x - the vector on which FDFT will be performed 17097504071SAmlan Barua 17197504071SAmlan Barua Output parameter: 17297504071SAmlan Barua y - vector that stores result of FDFT 17397504071SAmlan Barua */ 174b2b77a04SHong Zhang #undef __FUNCT__ 175b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 176b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 177b2b77a04SHong Zhang { 178b2b77a04SHong Zhang PetscErrorCode ierr; 179b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 180b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 181b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 182c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 183ce94432eSBarry Smith MPI_Comm comm; 184b2b77a04SHong Zhang 185b2b77a04SHong Zhang PetscFunctionBegin; 186ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 187b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 188b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 189b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 190b2b77a04SHong Zhang switch (ndim) { 191b2b77a04SHong Zhang case 1: 1923c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 193b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 194ae0a50aaSHong Zhang #else 1954f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 1963c3a9c75SAmlan Barua #endif 197b2b77a04SHong Zhang break; 198b2b77a04SHong Zhang case 2: 19997504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 200b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2013c3a9c75SAmlan Barua #else 2023c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2033c3a9c75SAmlan Barua #endif 204b2b77a04SHong Zhang break; 205b2b77a04SHong Zhang case 3: 2063c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 207b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2083c3a9c75SAmlan Barua #else 20951d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2103c3a9c75SAmlan Barua #endif 211b2b77a04SHong Zhang break; 212b2b77a04SHong Zhang default: 2133c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 214c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2153c3a9c75SAmlan Barua #else 2163c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2173c3a9c75SAmlan Barua #endif 218b2b77a04SHong Zhang break; 219b2b77a04SHong Zhang } 220b2b77a04SHong Zhang fftw->finarray = x_array; 221b2b77a04SHong Zhang fftw->foutarray = y_array; 222b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 223b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 224b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 225b2b77a04SHong Zhang } else { /* use existing plan */ 226b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 227b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 228b2b77a04SHong Zhang } else { 229b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 230b2b77a04SHong Zhang } 231b2b77a04SHong Zhang } 232b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 233b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 234b2b77a04SHong Zhang PetscFunctionReturn(0); 235b2b77a04SHong Zhang } 236b2b77a04SHong Zhang 23797504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT 23897504071SAmlan Barua Input parameter: 2393564f093SHong Zhang A - the matrix 24097504071SAmlan Barua x - the vector on which BDFT will be performed 24197504071SAmlan Barua 24297504071SAmlan Barua Output parameter: 24397504071SAmlan Barua y - vector that stores result of BDFT 24497504071SAmlan Barua */ 245b2b77a04SHong Zhang #undef __FUNCT__ 246b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 247b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 248b2b77a04SHong Zhang { 249b2b77a04SHong Zhang PetscErrorCode ierr; 250b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 251b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 252b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 253c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 254ce94432eSBarry Smith MPI_Comm comm; 255b2b77a04SHong Zhang 256b2b77a04SHong Zhang PetscFunctionBegin; 257ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 258b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 259b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 260b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 261b2b77a04SHong Zhang switch (ndim) { 262b2b77a04SHong Zhang case 1: 2633c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 264b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 265ae0a50aaSHong Zhang #else 2664f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 2673c3a9c75SAmlan Barua #endif 268b2b77a04SHong Zhang break; 269b2b77a04SHong Zhang case 2: 27097504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 271b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2723c3a9c75SAmlan Barua #else 2733c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 2743c3a9c75SAmlan Barua #endif 275b2b77a04SHong Zhang break; 276b2b77a04SHong Zhang case 3: 2773c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 278b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2793c3a9c75SAmlan Barua #else 2803c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 2813c3a9c75SAmlan Barua #endif 282b2b77a04SHong Zhang break; 283b2b77a04SHong Zhang default: 2843c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 285c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2863c3a9c75SAmlan Barua #else 2873c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 2883c3a9c75SAmlan Barua #endif 289b2b77a04SHong Zhang break; 290b2b77a04SHong Zhang } 291b2b77a04SHong Zhang fftw->binarray = x_array; 292b2b77a04SHong Zhang fftw->boutarray = y_array; 293b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 294b2b77a04SHong Zhang } else { /* use existing plan */ 295b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 296b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 297b2b77a04SHong Zhang } else { 298b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 299b2b77a04SHong Zhang } 300b2b77a04SHong Zhang } 301b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 302b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 303b2b77a04SHong Zhang PetscFunctionReturn(0); 304b2b77a04SHong Zhang } 305b2b77a04SHong Zhang 306b2b77a04SHong Zhang #undef __FUNCT__ 307b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 308b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 309b2b77a04SHong Zhang { 310b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 311b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 312b2b77a04SHong Zhang PetscErrorCode ierr; 313b2b77a04SHong Zhang 314b2b77a04SHong Zhang PetscFunctionBegin; 315b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 316b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 317b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 318bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 3196ccf0b3eSHong Zhang fftw_mpi_cleanup(); 320b2b77a04SHong Zhang PetscFunctionReturn(0); 321b2b77a04SHong Zhang } 322b2b77a04SHong Zhang 323c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 324b2b77a04SHong Zhang #undef __FUNCT__ 325b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 326b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 327b2b77a04SHong Zhang { 328b2b77a04SHong Zhang PetscErrorCode ierr; 329b2b77a04SHong Zhang PetscScalar *array; 330b2b77a04SHong Zhang 331b2b77a04SHong Zhang PetscFunctionBegin; 332b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 333b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 334b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 335b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 336b2b77a04SHong Zhang PetscFunctionReturn(0); 337b2b77a04SHong Zhang } 338b2b77a04SHong Zhang 3394f7415efSAmlan Barua #undef __FUNCT__ 3404be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW" 3414be45526SHong Zhang /*@ 342b2aa233eSHong Zhang MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 3434f7415efSAmlan Barua parallel layout determined by FFTW 3444f7415efSAmlan Barua 3454f7415efSAmlan Barua Collective on Mat 3464f7415efSAmlan Barua 3474f7415efSAmlan Barua Input Parameter: 3483564f093SHong Zhang . A - the matrix 3494f7415efSAmlan Barua 3504f7415efSAmlan Barua Output Parameter: 351cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW 352cc55f3b1SHong Zhang - y - (optional) output vector of forward FFTW 353cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW 3544f7415efSAmlan Barua 3554f7415efSAmlan Barua Level: advanced 3563564f093SHong Zhang 35797504071SAmlan Barua Note: The parallel layout of output of forward FFTW is always same as the input 35897504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 35997504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 36097504071SAmlan Barua Also note that we need to provide enough space while doing parallel real transform. 36197504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 36297504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 363e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 364e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two 365e0ec536eSAmlan Barua zeros if it is odd only one zero is needed. 366e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local 367e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for 368e0ec536eSAmlan Barua each processor and returns that. 3694f7415efSAmlan Barua 3704f7415efSAmlan Barua .seealso: MatCreateFFTW() 3714be45526SHong Zhang @*/ 3724be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 3734be45526SHong Zhang { 3744be45526SHong Zhang PetscErrorCode ierr; 375b4c3921fSHong Zhang 3764be45526SHong Zhang PetscFunctionBegin; 3774be45526SHong Zhang ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 3784be45526SHong Zhang PetscFunctionReturn(0); 3794be45526SHong Zhang } 3804be45526SHong Zhang 3814be45526SHong Zhang #undef __FUNCT__ 3824be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW" 3834be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 3844f7415efSAmlan Barua { 3854f7415efSAmlan Barua PetscErrorCode ierr; 3864f7415efSAmlan Barua PetscMPIInt size,rank; 387ce94432eSBarry Smith MPI_Comm comm; 3884f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 3894f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 3909496c9aeSAmlan Barua PetscInt N = fft->N; 3914f7415efSAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 3924f7415efSAmlan Barua 3934f7415efSAmlan Barua PetscFunctionBegin; 3944f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 3954f7415efSAmlan Barua PetscValidType(A,1); 396ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 3974f7415efSAmlan Barua 3984f8276c3SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 3994f8276c3SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 4004f7415efSAmlan Barua if (size == 1) { /* sequential case */ 4014f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4024f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 4034f7415efSAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 4044f7415efSAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 4054f7415efSAmlan Barua #else 4068ca4c034SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 4078ca4c034SAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 4088ca4c034SAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 4094f7415efSAmlan Barua #endif 41097504071SAmlan Barua } else { /* parallel cases */ 4114f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 4129496c9aeSAmlan Barua ptrdiff_t local_n1; 4139496c9aeSAmlan Barua fftw_complex *data_fout; 4149496c9aeSAmlan Barua ptrdiff_t local_1_start; 4159496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4169496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 4179496c9aeSAmlan Barua #else 4184f7415efSAmlan Barua double *data_finr,*data_boutr; 4196ccf0b3eSHong Zhang PetscInt n1,N1; 4209496c9aeSAmlan Barua ptrdiff_t temp; 4219496c9aeSAmlan Barua #endif 4229496c9aeSAmlan Barua 4234f7415efSAmlan Barua switch (ndim) { 4244f7415efSAmlan Barua case 1: 42557625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4264f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 42757625b84SAmlan Barua #else 42857625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 42957625b84SAmlan Barua if (fin) { 43057625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 431778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 43226fbe8dcSKarl Rupp 43357625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 43457625b84SAmlan Barua } 43557625b84SAmlan Barua if (fout) { 43657625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 437778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 43826fbe8dcSKarl Rupp 43957625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 44057625b84SAmlan Barua } 44157625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 44257625b84SAmlan Barua if (bout) { 44357625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 444778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 44526fbe8dcSKarl Rupp 44657625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 44757625b84SAmlan Barua } 44857625b84SAmlan Barua break; 44957625b84SAmlan Barua #endif 4504f7415efSAmlan Barua case 2: 45197504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 4524f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4534f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 4544f7415efSAmlan Barua if (fin) { 4554f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 456778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 45726fbe8dcSKarl Rupp 4584f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4594f7415efSAmlan Barua } 4604f7415efSAmlan Barua if (bout) { 4614f7415efSAmlan Barua data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 462778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 46326fbe8dcSKarl Rupp 4644f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4654f7415efSAmlan Barua } 46657625b84SAmlan Barua if (fout) { 46757625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 468778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 46926fbe8dcSKarl Rupp 47057625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 47157625b84SAmlan Barua } 4724f7415efSAmlan Barua #else 4734f7415efSAmlan Barua /* Get local size */ 4744f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 4754f7415efSAmlan Barua if (fin) { 4764f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 477778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 47826fbe8dcSKarl Rupp 4794f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4804f7415efSAmlan Barua } 4814f7415efSAmlan Barua if (fout) { 4824f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 483778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 48426fbe8dcSKarl Rupp 4854f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4864f7415efSAmlan Barua } 4874f7415efSAmlan Barua if (bout) { 4884f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 489778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 49026fbe8dcSKarl Rupp 4914f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4924f7415efSAmlan Barua } 4934f7415efSAmlan Barua #endif 4944f7415efSAmlan Barua break; 4954f7415efSAmlan Barua case 3: 4964f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4974f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4984f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 4994f7415efSAmlan Barua if (fin) { 5004f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 501778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 50226fbe8dcSKarl Rupp 5034f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5044f7415efSAmlan Barua } 5054f7415efSAmlan Barua if (bout) { 5064f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 507778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 50826fbe8dcSKarl Rupp 5094f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5104f7415efSAmlan Barua } 5113564f093SHong Zhang 51257625b84SAmlan Barua if (fout) { 51357625b84SAmlan Barua data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 514778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 51526fbe8dcSKarl Rupp 51657625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 51757625b84SAmlan Barua } 5184f7415efSAmlan Barua #else 5190c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 5200c9b18e4SAmlan Barua if (fin) { 5210c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 522778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 52326fbe8dcSKarl Rupp 5240c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5250c9b18e4SAmlan Barua } 5260c9b18e4SAmlan Barua if (fout) { 5270c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 528778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 52926fbe8dcSKarl Rupp 5300c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5310c9b18e4SAmlan Barua } 5320c9b18e4SAmlan Barua if (bout) { 5330c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 534778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 53526fbe8dcSKarl Rupp 5360c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5370c9b18e4SAmlan Barua } 5384f7415efSAmlan Barua #endif 5394f7415efSAmlan Barua break; 5404f7415efSAmlan Barua default: 5414f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5424f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 54326fbe8dcSKarl Rupp 5444f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 54526fbe8dcSKarl Rupp 5464f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5474f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 54826fbe8dcSKarl Rupp 5494f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 5504f7415efSAmlan Barua 5514f7415efSAmlan Barua if (fin) { 5524f7415efSAmlan Barua data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 553778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 55426fbe8dcSKarl Rupp 5554f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5564f7415efSAmlan Barua } 5574f7415efSAmlan Barua if (bout) { 5584f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 559778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 56026fbe8dcSKarl Rupp 5619496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5624f7415efSAmlan Barua } 5633564f093SHong Zhang 56457625b84SAmlan Barua if (fout) { 56557625b84SAmlan Barua data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 566778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 56726fbe8dcSKarl Rupp 56857625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 56957625b84SAmlan Barua } 5704f7415efSAmlan Barua #else 5710c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 5720c9b18e4SAmlan Barua if (fin) { 5730c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 574778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 57526fbe8dcSKarl Rupp 5760c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5770c9b18e4SAmlan Barua } 5780c9b18e4SAmlan Barua if (fout) { 5790c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 580778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 58126fbe8dcSKarl Rupp 5820c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5830c9b18e4SAmlan Barua } 5840c9b18e4SAmlan Barua if (bout) { 5850c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 586778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 58726fbe8dcSKarl Rupp 5880c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5890c9b18e4SAmlan Barua } 5904f7415efSAmlan Barua #endif 5914f7415efSAmlan Barua break; 5924f7415efSAmlan Barua } 5934f7415efSAmlan Barua } 5944f7415efSAmlan Barua PetscFunctionReturn(0); 5954f7415efSAmlan Barua } 5964f7415efSAmlan Barua 597c92418ccSAmlan Barua #undef __FUNCT__ 59874a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW" 5993564f093SHong Zhang /*@ 6003564f093SHong Zhang VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 60154efbe56SHong Zhang 6023564f093SHong Zhang Collective on Mat 6033564f093SHong Zhang 6043564f093SHong Zhang Input Parameters: 6053564f093SHong Zhang + A - FFTW matrix 6063564f093SHong Zhang - x - the PETSc vector 6073564f093SHong Zhang 6083564f093SHong Zhang Output Parameters: 6093564f093SHong Zhang . y - the FFTW vector 6103564f093SHong Zhang 611b2b77a04SHong Zhang Options Database Keys: 6123564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags 613b2b77a04SHong Zhang 614b2b77a04SHong Zhang Level: intermediate 6153564f093SHong Zhang 61697504071SAmlan Barua Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 61797504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 61897504071SAmlan Barua zeros. This routine does that job by scattering operation. 61997504071SAmlan Barua 6203564f093SHong Zhang .seealso: VecScatterFFTWToPetsc() 6213564f093SHong Zhang @*/ 6223564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 6233564f093SHong Zhang { 6243564f093SHong Zhang PetscErrorCode ierr; 625b2b77a04SHong Zhang 6263564f093SHong Zhang PetscFunctionBegin; 6273564f093SHong Zhang ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 6283564f093SHong Zhang PetscFunctionReturn(0); 6293564f093SHong Zhang } 6303c3a9c75SAmlan Barua 6313c3a9c75SAmlan Barua #undef __FUNCT__ 6321986ecc6SHong Zhang #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 63374a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 6343c3a9c75SAmlan Barua { 6353c3a9c75SAmlan Barua PetscErrorCode ierr; 636ce94432eSBarry Smith MPI_Comm comm; 6373c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 6383c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 6399496c9aeSAmlan Barua PetscInt N =fft->N; 640b5d11533SAmlan Barua PetscInt ndim =fft->ndim,*dim=fft->dim; 6419496c9aeSAmlan Barua PetscInt low; 6428ca4c034SAmlan Barua PetscInt rank,size,vsize,vsize1; 6433c3a9c75SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 6449496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 6453c3a9c75SAmlan Barua VecScatter vecscat; 6463c3a9c75SAmlan Barua IS list1,list2; 6479496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6489496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 6499496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 6509496c9aeSAmlan Barua PetscInt N1, n1,NM; 6519496c9aeSAmlan Barua ptrdiff_t temp; 6529496c9aeSAmlan Barua #endif 6533c3a9c75SAmlan Barua 6543564f093SHong Zhang PetscFunctionBegin; 655ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 656f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 657f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 6580298fd71SBarry Smith ierr = VecGetOwnershipRange(y,&low,NULL); 6593c3a9c75SAmlan Barua 6603564f093SHong Zhang if (size==1) { 6618ca4c034SAmlan Barua ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 6628ca4c034SAmlan Barua ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 6639496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 6646971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 6656971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6666971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6676971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 668b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 6693564f093SHong Zhang } else { 6703c3a9c75SAmlan Barua switch (ndim) { 6713c3a9c75SAmlan Barua case 1: 67264657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 67364657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 67426fbe8dcSKarl Rupp 6754f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 6764f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,low,1,&list2); 67764657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 67864657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 67964657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 68064657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 68164657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 68264657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 68364657d84SAmlan Barua #else 684e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 68564657d84SAmlan Barua #endif 6863c3a9c75SAmlan Barua break; 6873c3a9c75SAmlan Barua case 2: 688bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 689bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 69026fbe8dcSKarl Rupp 6914f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 6924f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 693bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 694bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 695bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 696bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 697bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 698bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 699bd59e6c6SAmlan Barua #else 7003c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 70126fbe8dcSKarl Rupp 7023c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 7039496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 7049496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 7053c3a9c75SAmlan Barua 7063564f093SHong Zhang if (dim[1]%2==0) { 7073c3a9c75SAmlan Barua NM = dim[1]+2; 7083564f093SHong Zhang } else { 7093c3a9c75SAmlan Barua NM = dim[1]+1; 7103564f093SHong Zhang } 7113c3a9c75SAmlan Barua for (i=0; i<local_n0; i++) { 7123c3a9c75SAmlan Barua for (j=0; j<dim[1]; j++) { 7133c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 7143c3a9c75SAmlan Barua tempindx1 = i*NM + j; 71526fbe8dcSKarl Rupp 7165b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 7173c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 7183c3a9c75SAmlan Barua } 7193c3a9c75SAmlan Barua } 7203c3a9c75SAmlan Barua 7213c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 7223c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 7233c3a9c75SAmlan Barua 724f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 725f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 726f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 728b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 729b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 730b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 731b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 732bd59e6c6SAmlan Barua #endif 7339496c9aeSAmlan Barua break; 7343c3a9c75SAmlan Barua 7353c3a9c75SAmlan Barua case 3: 736bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 737bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 73826fbe8dcSKarl Rupp 7394f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 7404f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 741bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 742bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 743bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 744bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 745bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 746bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 747bd59e6c6SAmlan Barua #else 74851d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 74926fbe8dcSKarl Rupp 75051d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 75151d1eed7SAmlan Barua 7529496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 7539496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 75451d1eed7SAmlan Barua 755db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 756db4deed7SKarl Rupp else NM = dim[2]+1; 75751d1eed7SAmlan Barua 75851d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 75951d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 76051d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 76151d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 76251d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 76326fbe8dcSKarl Rupp 76451d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 76551d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 76651d1eed7SAmlan Barua } 76751d1eed7SAmlan Barua } 76851d1eed7SAmlan Barua } 76951d1eed7SAmlan Barua 77051d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 77151d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 77251d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 77351d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 77451d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 77551d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 776b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 777b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 778b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 779b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 780bd59e6c6SAmlan Barua #endif 7819496c9aeSAmlan Barua break; 7823c3a9c75SAmlan Barua 7833c3a9c75SAmlan Barua default: 784bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 785bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 78626fbe8dcSKarl Rupp 7874f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 7884f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 789bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 790bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 791bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 792bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 793bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 794bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 795bd59e6c6SAmlan Barua #else 796e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 79726fbe8dcSKarl Rupp 798e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 79926fbe8dcSKarl Rupp 800e5d7f247SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 80126fbe8dcSKarl Rupp 802e5d7f247SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 80326fbe8dcSKarl Rupp 804e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 805e5d7f247SAmlan Barua 806e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 807e5d7f247SAmlan Barua 808e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 809e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 810e5d7f247SAmlan Barua 811db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 812db4deed7SKarl Rupp else NM = 1; 813e5d7f247SAmlan Barua 8146971673cSAmlan Barua j = low; 8153564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 8166971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 8176971673cSAmlan Barua indx2[i] = j; 81826fbe8dcSKarl Rupp if (k%dim[ndim-1]==0) j+=NM; 8196971673cSAmlan Barua j++; 8206971673cSAmlan Barua } 8216971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 8226971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 8236971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 8246971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8256971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8266971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 827b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 828b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 829b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 830b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 831bd59e6c6SAmlan Barua #endif 8329496c9aeSAmlan Barua break; 8333c3a9c75SAmlan Barua } 834e81bb599SAmlan Barua } 8353564f093SHong Zhang PetscFunctionReturn(0); 8363c3a9c75SAmlan Barua } 8373c3a9c75SAmlan Barua 8383c3a9c75SAmlan Barua #undef __FUNCT__ 83974a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc" 8403564f093SHong Zhang /*@ 8413564f093SHong Zhang VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 8423564f093SHong Zhang 8433564f093SHong Zhang Collective on Mat 8443564f093SHong Zhang 8453564f093SHong Zhang Input Parameters: 8463564f093SHong Zhang + A - FFTW matrix 8473564f093SHong Zhang - x - FFTW vector 8483564f093SHong Zhang 8493564f093SHong Zhang Output Parameters: 8503564f093SHong Zhang . y - PETSc vector 8513564f093SHong Zhang 8523564f093SHong Zhang Level: intermediate 8533564f093SHong Zhang 8543564f093SHong Zhang Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 8553564f093SHong Zhang VecScatterFFTWToPetsc removes those extra zeros. 8563564f093SHong Zhang 8573564f093SHong Zhang .seealso: VecScatterPetscToFFTW() 8583564f093SHong Zhang @*/ 85974a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 8603c3a9c75SAmlan Barua { 8613c3a9c75SAmlan Barua PetscErrorCode ierr; 8625fd66863SKarl Rupp 8633c3a9c75SAmlan Barua PetscFunctionBegin; 86474a26884SAmlan Barua ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 8653c3a9c75SAmlan Barua PetscFunctionReturn(0); 8663c3a9c75SAmlan Barua } 86754efbe56SHong Zhang 8683c3a9c75SAmlan Barua #undef __FUNCT__ 86974a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 87074a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 8715b3e41ffSAmlan Barua { 8725b3e41ffSAmlan Barua PetscErrorCode ierr; 873ce94432eSBarry Smith MPI_Comm comm; 8745b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 8755b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 8769496c9aeSAmlan Barua PetscInt N = fft->N; 877b3655f67SAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim; 8789496c9aeSAmlan Barua PetscInt low; 8799496c9aeSAmlan Barua PetscInt rank,size; 8805b3e41ffSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 8819496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 8825b3e41ffSAmlan Barua VecScatter vecscat; 8835b3e41ffSAmlan Barua IS list1,list2; 8849496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 8859496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 8869496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 8879496c9aeSAmlan Barua PetscInt N1, n1,NM; 8889496c9aeSAmlan Barua ptrdiff_t temp; 8899496c9aeSAmlan Barua #endif 8909496c9aeSAmlan Barua 8913564f093SHong Zhang PetscFunctionBegin; 892ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 8935b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 8945b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 8950298fd71SBarry Smith ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 8965b3e41ffSAmlan Barua 897e81bb599SAmlan Barua if (size==1) { 898b3655f67SAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 8996971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 9006971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9016971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9026971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 903b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 904e81bb599SAmlan Barua 9053564f093SHong Zhang } else { 906e81bb599SAmlan Barua switch (ndim) { 907e81bb599SAmlan Barua case 1: 90864657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 90964657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 91026fbe8dcSKarl Rupp 9114f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 9124f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,low,1,&list2); 91364657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 91464657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 91564657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 91664657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 91764657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 91864657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 91964657d84SAmlan Barua #else 9206a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 92164657d84SAmlan Barua #endif 9225b3e41ffSAmlan Barua break; 9235b3e41ffSAmlan Barua case 2: 924bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 925bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 92626fbe8dcSKarl Rupp 9274f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 9284f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 929bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 930bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 931bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 932bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 933bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 934bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 935bd59e6c6SAmlan Barua #else 9365b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 93726fbe8dcSKarl Rupp 9385b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 9395b3e41ffSAmlan Barua 9409496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 9419496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 9425b3e41ffSAmlan Barua 943db4deed7SKarl Rupp if (dim[1]%2==0) NM = dim[1]+2; 944db4deed7SKarl Rupp else NM = dim[1]+1; 9455b3e41ffSAmlan Barua 9465b3e41ffSAmlan Barua for (i=0; i<local_n0; i++) { 9475b3e41ffSAmlan Barua for (j=0; j<dim[1]; j++) { 9485b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 9495b3e41ffSAmlan Barua tempindx1 = i*NM + j; 95026fbe8dcSKarl Rupp 9515b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 9525b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 9535b3e41ffSAmlan Barua } 9545b3e41ffSAmlan Barua } 9555b3e41ffSAmlan Barua 9565b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 9575b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 9585b3e41ffSAmlan Barua 9595b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 9605b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9615b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9625b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 963b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 964b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 965b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 966b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 967bd59e6c6SAmlan Barua #endif 9689496c9aeSAmlan Barua break; 9695b3e41ffSAmlan Barua case 3: 970bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 971bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 97226fbe8dcSKarl Rupp 9734f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 9744f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 975bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 976bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 977bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 978bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 979bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 980bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 981bd59e6c6SAmlan Barua #else 982bd59e6c6SAmlan Barua 98351d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 98426fbe8dcSKarl Rupp 98551d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 98651d1eed7SAmlan Barua 9879496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 9889496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 98951d1eed7SAmlan Barua 990db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 991db4deed7SKarl Rupp else NM = dim[2]+1; 99251d1eed7SAmlan Barua 99351d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 99451d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 99551d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 99651d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 99751d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 99826fbe8dcSKarl Rupp 99951d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 100051d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 100151d1eed7SAmlan Barua } 100251d1eed7SAmlan Barua } 100351d1eed7SAmlan Barua } 100451d1eed7SAmlan Barua 100551d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 100651d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 100751d1eed7SAmlan Barua 100851d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 100951d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 101051d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 101151d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1012b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1013b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1014b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1015b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1016bd59e6c6SAmlan Barua #endif 10179496c9aeSAmlan Barua break; 10185b3e41ffSAmlan Barua default: 1019bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1020bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 102126fbe8dcSKarl Rupp 10224f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 10234f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1024bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1025bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1027bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1028bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1029bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1030bd59e6c6SAmlan Barua #else 1031ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 103226fbe8dcSKarl Rupp 1033ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 103426fbe8dcSKarl Rupp 1035ba6e06d0SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 103626fbe8dcSKarl Rupp 1037ba6e06d0SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 103826fbe8dcSKarl Rupp 1039ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1040ba6e06d0SAmlan Barua 1041ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1042ba6e06d0SAmlan Barua 1043ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1044ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1045ba6e06d0SAmlan Barua 1046db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 1047db4deed7SKarl Rupp else NM = 1; 1048ba6e06d0SAmlan Barua 1049ba6e06d0SAmlan Barua j = low; 10503564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1051ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1052ba6e06d0SAmlan Barua indx2[i] = j; 10533564f093SHong Zhang if (k%dim[ndim-1]==0) j+=NM; 1054ba6e06d0SAmlan Barua j++; 1055ba6e06d0SAmlan Barua } 1056ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1057ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1058ba6e06d0SAmlan Barua 1059ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1060ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1061ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1062ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1063b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1064b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1065b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1066b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1067bd59e6c6SAmlan Barua #endif 10689496c9aeSAmlan Barua break; 10695b3e41ffSAmlan Barua } 1070e81bb599SAmlan Barua } 10713564f093SHong Zhang PetscFunctionReturn(0); 10725b3e41ffSAmlan Barua } 10735b3e41ffSAmlan Barua 10745b3e41ffSAmlan Barua #undef __FUNCT__ 10753c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 10763c3a9c75SAmlan Barua /* 10773564f093SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 10783564f093SHong Zhang 10793c3a9c75SAmlan Barua Options Database Keys: 10803c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 10813c3a9c75SAmlan Barua 10823c3a9c75SAmlan Barua Level: intermediate 10833c3a9c75SAmlan Barua 10843c3a9c75SAmlan Barua */ 10858cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1086b2b77a04SHong Zhang { 1087b2b77a04SHong Zhang PetscErrorCode ierr; 1088ce94432eSBarry Smith MPI_Comm comm; 1089b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1090b2b77a04SHong Zhang Mat_FFTW *fftw; 1091b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 1092*5274ed1bSHong Zhang const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1093*5274ed1bSHong Zhang unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1094b2b77a04SHong Zhang PetscBool flg; 10954f48bc67SAmlan Barua PetscInt p_flag,partial_dim=1,ctr; 109611902ff2SHong Zhang PetscMPIInt size,rank; 10979496c9aeSAmlan Barua ptrdiff_t *pdim; 10989496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 10999496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11009496c9aeSAmlan Barua ptrdiff_t temp; 11018ca4c034SAmlan Barua PetscInt N1,tot_dim; 11024f48bc67SAmlan Barua #else 11034f48bc67SAmlan Barua PetscInt n1; 11049496c9aeSAmlan Barua #endif 11059496c9aeSAmlan Barua 1106b2b77a04SHong Zhang PetscFunctionBegin; 1107ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1108b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 110911902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1110c92418ccSAmlan Barua 11111878d478SAmlan Barua fftw_mpi_init(); 111211902ff2SHong Zhang pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 111311902ff2SHong Zhang pdim[0] = dim[0]; 11148ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11158ca4c034SAmlan Barua tot_dim = 2*dim[0]; 11168ca4c034SAmlan Barua #endif 11173564f093SHong Zhang for (ctr=1; ctr<ndim; ctr++) { 11186882372aSHong Zhang partial_dim *= dim[ctr]; 111911902ff2SHong Zhang pdim[ctr] = dim[ctr]; 11208ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1121db4deed7SKarl Rupp if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1122db4deed7SKarl Rupp else tot_dim *= dim[ctr]; 11238ca4c034SAmlan Barua #endif 11246882372aSHong Zhang } 11253c3a9c75SAmlan Barua 1126b2b77a04SHong Zhang if (size == 1) { 1127e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1128b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1129b2b77a04SHong Zhang n = N; 1130e81bb599SAmlan Barua #else 1131e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1132e81bb599SAmlan Barua n = tot_dim; 1133e81bb599SAmlan Barua #endif 1134e81bb599SAmlan Barua 1135b2b77a04SHong Zhang } else { 11361abc6020SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 1137b2b77a04SHong Zhang switch (ndim) { 1138b2b77a04SHong Zhang case 1: 11393c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11403c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1141e5d7f247SAmlan Barua #else 11429496c9aeSAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 114326fbe8dcSKarl Rupp 11446882372aSHong Zhang n = (PetscInt)local_n0; 11459496c9aeSAmlan Barua n1 = (PetscInt)local_n1; 11469496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1147e5d7f247SAmlan Barua #endif 1148b2b77a04SHong Zhang break; 1149b2b77a04SHong Zhang case 2: 11505b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 1151b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1152b2b77a04SHong Zhang /* 1153b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 11540ec8b6e3SBarry Smith PetscSynchronizedFlush(comm,PETSC_STDOUT); 1155b2b77a04SHong Zhang */ 1156b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1157b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 11585b3e41ffSAmlan Barua #else 11594f8276c3SHong Zhang alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 116026fbe8dcSKarl Rupp 11615b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1162c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 11635b3e41ffSAmlan Barua #endif 1164b2b77a04SHong Zhang break; 1165b2b77a04SHong Zhang case 3: 116651d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 116751d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 116826fbe8dcSKarl Rupp 11696882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 11706882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 117151d1eed7SAmlan Barua #else 11724f8276c3SHong Zhang alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 117326fbe8dcSKarl Rupp 117451d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1175c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 117651d1eed7SAmlan Barua #endif 1177b2b77a04SHong Zhang break; 1178b2b77a04SHong Zhang default: 1179b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 118011902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 118126fbe8dcSKarl Rupp 11826882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 11836882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1184b3a17365SAmlan Barua #else 1185b3a17365SAmlan Barua temp = pdim[ndim-1]; 118626fbe8dcSKarl Rupp 1187b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 118826fbe8dcSKarl Rupp 11894f8276c3SHong Zhang alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 119026fbe8dcSKarl Rupp 1191b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1192b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 119326fbe8dcSKarl Rupp 1194b3a17365SAmlan Barua pdim[ndim-1] = temp; 119526fbe8dcSKarl Rupp 1196c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1197b3a17365SAmlan Barua #endif 1198b2b77a04SHong Zhang break; 1199b2b77a04SHong Zhang } 1200b2b77a04SHong Zhang } 1201b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1202b00a9115SJed Brown ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1203b2b77a04SHong Zhang fft->data = (void*)fftw; 1204b2b77a04SHong Zhang 1205b2b77a04SHong Zhang fft->n = n; 12060c74a584SJed Brown fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1207e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 120826fbe8dcSKarl Rupp 12095e806cb5SMatthew G. Knepley ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 121026fbe8dcSKarl Rupp 1211b1301b2eSHong Zhang for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1212c92418ccSAmlan Barua 1213b2b77a04SHong Zhang fftw->p_forward = 0; 1214b2b77a04SHong Zhang fftw->p_backward = 0; 1215b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1216b2b77a04SHong Zhang 1217b2b77a04SHong Zhang if (size == 1) { 1218b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1219b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1220b2b77a04SHong Zhang } else { 1221b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1222b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1223b2b77a04SHong Zhang } 1224b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1225b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 12264a52bad8SHong Zhang A->preallocated = PETSC_TRUE; 122726fbe8dcSKarl Rupp 1228bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr); 1229bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1230bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1231b2b77a04SHong Zhang 1232b2b77a04SHong Zhang /* get runtime options */ 1233ce94432eSBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1234*5274ed1bSHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 1235*5274ed1bSHong Zhang if (flg) { 1236*5274ed1bSHong Zhang fftw->p_flag = iplans[p_flag]; 1237*5274ed1bSHong Zhang } 12384a52bad8SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 1239b2b77a04SHong Zhang PetscFunctionReturn(0); 1240b2b77a04SHong Zhang } 12413c3a9c75SAmlan Barua 12423c3a9c75SAmlan Barua 12433c3a9c75SAmlan Barua 12443c3a9c75SAmlan Barua 1245