1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14e5d7f247SAmlan Barua PetscInt partial_dim; 15b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 16b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 19b2b77a04SHong Zhang } Mat_FFTW; 20b2b77a04SHong Zhang 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27b2b77a04SHong Zhang 28*97504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel 29*97504071SAmlan Barua 30*97504071SAmlan Barua Input parameter: 31*97504071SAmlan Barua mat - the matrix 32*97504071SAmlan Barua x - the vector on which FDFT will be performed 33*97504071SAmlan Barua 34*97504071SAmlan Barua Output parameter: 35*97504071SAmlan Barua y - vector that stores result of FDFT 36*97504071SAmlan Barua 37*97504071SAmlan Barua */ 38b2b77a04SHong Zhang #undef __FUNCT__ 39b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 40b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 41b2b77a04SHong Zhang { 42b2b77a04SHong Zhang PetscErrorCode ierr; 43b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 44b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 45b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 46b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 47b2b77a04SHong Zhang 48b2b77a04SHong Zhang PetscFunctionBegin; 49b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 50b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 51b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 52b2b77a04SHong Zhang switch (ndim){ 53b2b77a04SHong Zhang case 1: 5458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 55b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 5658a912c5SAmlan Barua #else 5758a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 5858a912c5SAmlan Barua #endif 59b2b77a04SHong Zhang break; 60b2b77a04SHong Zhang case 2: 6158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 62b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6358a912c5SAmlan Barua #else 6458a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 6558a912c5SAmlan Barua #endif 66b2b77a04SHong Zhang break; 67b2b77a04SHong Zhang case 3: 6858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 69b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 7058a912c5SAmlan Barua #else 7158a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 7258a912c5SAmlan Barua #endif 73b2b77a04SHong Zhang break; 74b2b77a04SHong Zhang default: 7558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 76b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 7758a912c5SAmlan Barua #else 7858a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 7958a912c5SAmlan Barua #endif 80b2b77a04SHong Zhang break; 81b2b77a04SHong Zhang } 82b2b77a04SHong Zhang fftw->finarray = x_array; 83b2b77a04SHong Zhang fftw->foutarray = y_array; 84b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 85b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 86b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 87b2b77a04SHong Zhang } else { /* use existing plan */ 88b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 89b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 90b2b77a04SHong Zhang } else { 91b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 92b2b77a04SHong Zhang } 93b2b77a04SHong Zhang } 94b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 95b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 96b2b77a04SHong Zhang PetscFunctionReturn(0); 97b2b77a04SHong Zhang } 98b2b77a04SHong Zhang 99*97504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT 100*97504071SAmlan Barua 101*97504071SAmlan Barua Input parameter: 102*97504071SAmlan Barua mat - the matrix 103*97504071SAmlan Barua x - the vector on which BDFT will be performed 104*97504071SAmlan Barua 105*97504071SAmlan Barua Output parameter: 106*97504071SAmlan Barua y - vector that stores result of BDFT 107*97504071SAmlan Barua 108*97504071SAmlan Barua */ 109*97504071SAmlan Barua 110b2b77a04SHong Zhang #undef __FUNCT__ 111b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 112b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 113b2b77a04SHong Zhang { 114b2b77a04SHong Zhang PetscErrorCode ierr; 115b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 116b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 117b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 118b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 119b2b77a04SHong Zhang 120b2b77a04SHong Zhang PetscFunctionBegin; 121b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 122b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 123b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 124b2b77a04SHong Zhang switch (ndim){ 125b2b77a04SHong Zhang case 1: 12658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 127b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12858a912c5SAmlan Barua #else 129e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 13058a912c5SAmlan Barua #endif 131b2b77a04SHong Zhang break; 132b2b77a04SHong Zhang case 2: 13358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 134b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 13558a912c5SAmlan Barua #else 136e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 13758a912c5SAmlan Barua #endif 138b2b77a04SHong Zhang break; 139b2b77a04SHong Zhang case 3: 14058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 141b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 14258a912c5SAmlan Barua #else 143e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 14458a912c5SAmlan Barua #endif 145b2b77a04SHong Zhang break; 146b2b77a04SHong Zhang default: 14758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 148b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 14958a912c5SAmlan Barua #else 150e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 15158a912c5SAmlan Barua #endif 152b2b77a04SHong Zhang break; 153b2b77a04SHong Zhang } 154b2b77a04SHong Zhang fftw->binarray = x_array; 155b2b77a04SHong Zhang fftw->boutarray = y_array; 156b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 157b2b77a04SHong Zhang } else { /* use existing plan */ 158b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 159b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 160b2b77a04SHong Zhang } else { 161b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 162b2b77a04SHong Zhang } 163b2b77a04SHong Zhang } 164b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 165b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 166b2b77a04SHong Zhang PetscFunctionReturn(0); 167b2b77a04SHong Zhang } 168b2b77a04SHong Zhang 169*97504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel 170*97504071SAmlan Barua 171*97504071SAmlan Barua Input parameter: 172*97504071SAmlan Barua mat - the matrix 173*97504071SAmlan Barua x - the vector on which FDFT will be performed 174*97504071SAmlan Barua 175*97504071SAmlan Barua Output parameter: 176*97504071SAmlan Barua y - vector that stores result of FDFT 177*97504071SAmlan Barua 178*97504071SAmlan Barua */ 179*97504071SAmlan Barua 180b2b77a04SHong Zhang #undef __FUNCT__ 181b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 182b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 183b2b77a04SHong Zhang { 184b2b77a04SHong Zhang PetscErrorCode ierr; 185b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 186b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 187b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 188c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 189b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 190b2b77a04SHong Zhang 191b2b77a04SHong Zhang PetscFunctionBegin; 192b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 193b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 194b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 195b2b77a04SHong Zhang switch (ndim){ 196b2b77a04SHong Zhang case 1: 1973c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 198b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 199ae0a50aaSHong Zhang #else 200ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 2013c3a9c75SAmlan Barua #endif 202b2b77a04SHong Zhang break; 203b2b77a04SHong Zhang case 2: 204*97504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 205b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2063c3a9c75SAmlan Barua #else 2073c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2083c3a9c75SAmlan Barua #endif 209b2b77a04SHong Zhang break; 210b2b77a04SHong Zhang case 3: 2113c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 212b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2133c3a9c75SAmlan Barua #else 21451d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2153c3a9c75SAmlan Barua #endif 216b2b77a04SHong Zhang break; 217b2b77a04SHong Zhang default: 2183c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 219c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2203c3a9c75SAmlan Barua #else 2213c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2223c3a9c75SAmlan Barua #endif 223b2b77a04SHong Zhang break; 224b2b77a04SHong Zhang } 225b2b77a04SHong Zhang fftw->finarray = x_array; 226b2b77a04SHong Zhang fftw->foutarray = y_array; 227b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 228b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 229b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 230b2b77a04SHong Zhang } else { /* use existing plan */ 231b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 232b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 233b2b77a04SHong Zhang } else { 234b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 235b2b77a04SHong Zhang } 236b2b77a04SHong Zhang } 237b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 238b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 239b2b77a04SHong Zhang PetscFunctionReturn(0); 240b2b77a04SHong Zhang } 241b2b77a04SHong Zhang 242*97504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT 243*97504071SAmlan Barua 244*97504071SAmlan Barua Input parameter: 245*97504071SAmlan Barua mat - the matrix 246*97504071SAmlan Barua x - the vector on which BDFT will be performed 247*97504071SAmlan Barua 248*97504071SAmlan Barua Output parameter: 249*97504071SAmlan Barua y - vector that stores result of BDFT 250*97504071SAmlan Barua 251*97504071SAmlan Barua */ 252b2b77a04SHong Zhang #undef __FUNCT__ 253b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 254b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 255b2b77a04SHong Zhang { 256b2b77a04SHong Zhang PetscErrorCode ierr; 257b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 258b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 259b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 260c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 261b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 262b2b77a04SHong Zhang 263b2b77a04SHong Zhang PetscFunctionBegin; 264b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 265b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 266b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 267b2b77a04SHong Zhang switch (ndim){ 268b2b77a04SHong Zhang case 1: 2693c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 270b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 271ae0a50aaSHong Zhang #else 272ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 2733c3a9c75SAmlan Barua #endif 274b2b77a04SHong Zhang break; 275b2b77a04SHong Zhang case 2: 276*97504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 277b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2783c3a9c75SAmlan Barua #else 2793c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2803c3a9c75SAmlan Barua #endif 281b2b77a04SHong Zhang break; 282b2b77a04SHong Zhang case 3: 2833c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 284b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2853c3a9c75SAmlan Barua #else 2863c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2873c3a9c75SAmlan Barua #endif 288b2b77a04SHong Zhang break; 289b2b77a04SHong Zhang default: 2903c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 291c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2923c3a9c75SAmlan Barua #else 2933c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2943c3a9c75SAmlan Barua #endif 295b2b77a04SHong Zhang break; 296b2b77a04SHong Zhang } 297b2b77a04SHong Zhang fftw->binarray = x_array; 298b2b77a04SHong Zhang fftw->boutarray = y_array; 299b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 300b2b77a04SHong Zhang } else { /* use existing plan */ 301b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 302b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 303b2b77a04SHong Zhang } else { 304b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 305b2b77a04SHong Zhang } 306b2b77a04SHong Zhang } 307b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 308b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 309b2b77a04SHong Zhang PetscFunctionReturn(0); 310b2b77a04SHong Zhang } 311b2b77a04SHong Zhang 312b2b77a04SHong Zhang #undef __FUNCT__ 313b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 314b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 315b2b77a04SHong Zhang { 316b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 317b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 318b2b77a04SHong Zhang PetscErrorCode ierr; 319b2b77a04SHong Zhang 320b2b77a04SHong Zhang PetscFunctionBegin; 321b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 322b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 323b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 324bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 325b2b77a04SHong Zhang PetscFunctionReturn(0); 326b2b77a04SHong Zhang } 327b2b77a04SHong Zhang 328c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 329b2b77a04SHong Zhang #undef __FUNCT__ 330b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 331b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 332b2b77a04SHong Zhang { 333b2b77a04SHong Zhang PetscErrorCode ierr; 334b2b77a04SHong Zhang PetscScalar *array; 335b2b77a04SHong Zhang 336b2b77a04SHong Zhang PetscFunctionBegin; 337b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 338b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 339b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 340b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 341b2b77a04SHong Zhang PetscFunctionReturn(0); 342b2b77a04SHong Zhang } 343b2b77a04SHong Zhang 3443c3a9c75SAmlan Barua 3454f7415efSAmlan Barua #undef __FUNCT__ 3464be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW" 3474be45526SHong Zhang /*@ 348b2aa233eSHong Zhang MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 3494f7415efSAmlan Barua parallel layout determined by FFTW 3504f7415efSAmlan Barua 3514f7415efSAmlan Barua Collective on Mat 3524f7415efSAmlan Barua 3534f7415efSAmlan Barua Input Parameter: 3544f7415efSAmlan Barua . mat - the matrix 3554f7415efSAmlan Barua 3564f7415efSAmlan Barua Output Parameter: 3574f7415efSAmlan Barua + fin - (optional) input vector of forward FFTW 3584f7415efSAmlan Barua - fout - (optional) output vector of forward FFTW 359*97504071SAmlan Barua - bout - (optional) output vector of backward FFTW 3604f7415efSAmlan Barua 3614f7415efSAmlan Barua Level: advanced 362*97504071SAmlan Barua Note: The parallel layout of output of forward FFTW is always same as the input 363*97504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 364*97504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 365*97504071SAmlan Barua Also note that we need to provide enough space while doing parallel real transform. 366*97504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 367*97504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 368*97504071SAmlan Barua last dimension from n to n/2+1 while invoking this routine. 3694f7415efSAmlan Barua 3704f7415efSAmlan Barua .seealso: MatCreateFFTW() 3714be45526SHong Zhang @*/ 3724be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 3734be45526SHong Zhang { 3744be45526SHong Zhang PetscErrorCode ierr; 375b4c3921fSHong Zhang 3764be45526SHong Zhang PetscFunctionBegin; 3774be45526SHong Zhang ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 3784be45526SHong Zhang PetscFunctionReturn(0); 3794be45526SHong Zhang } 3804be45526SHong Zhang 3814f7415efSAmlan Barua EXTERN_C_BEGIN 3824be45526SHong Zhang #undef __FUNCT__ 3834be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW" 3844be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 3854f7415efSAmlan Barua { 3864f7415efSAmlan Barua PetscErrorCode ierr; 3874f7415efSAmlan Barua PetscMPIInt size,rank; 3884f7415efSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 3894f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 3904f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 3919496c9aeSAmlan Barua PetscInt N=fft->N; 3924f7415efSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 3934f7415efSAmlan Barua 3944f7415efSAmlan Barua PetscFunctionBegin; 3954f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 3964f7415efSAmlan Barua PetscValidType(A,1); 3974f7415efSAmlan Barua 3984f7415efSAmlan Barua ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 3994f7415efSAmlan Barua ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 4004f7415efSAmlan Barua if (size == 1){ /* sequential case */ 4014f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4024f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 4034f7415efSAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 4044f7415efSAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 4054f7415efSAmlan Barua #else 4068ca4c034SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 4078ca4c034SAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 4088ca4c034SAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 4094f7415efSAmlan Barua #endif 410*97504071SAmlan Barua } else { /* parallel cases */ 4114f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 4129496c9aeSAmlan Barua ptrdiff_t local_n1; 4139496c9aeSAmlan Barua fftw_complex *data_fout; 4149496c9aeSAmlan Barua ptrdiff_t local_1_start; 4159496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4169496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 4179496c9aeSAmlan Barua #else 4184f7415efSAmlan Barua double *data_finr,*data_boutr; 4199496c9aeSAmlan Barua PetscInt n1,N1,vsize; 4209496c9aeSAmlan Barua ptrdiff_t temp; 4219496c9aeSAmlan Barua #endif 4229496c9aeSAmlan Barua 4234f7415efSAmlan Barua switch (ndim){ 4244f7415efSAmlan Barua case 1: 42557625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 42657625b84SAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 42757625b84SAmlan Barua #else 42857625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 42957625b84SAmlan Barua if (fin) { 43057625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 43157625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 43257625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 43357625b84SAmlan Barua } 43457625b84SAmlan Barua if (fout) { 43557625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 43657625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 43757625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 43857625b84SAmlan Barua } 43957625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 44057625b84SAmlan Barua if (bout) { 44157625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 44257625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 44357625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 44457625b84SAmlan Barua } 44557625b84SAmlan Barua break; 44657625b84SAmlan Barua #endif 4474f7415efSAmlan Barua case 2: 448*97504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 4494f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4504f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 4514f7415efSAmlan Barua if (fin) { 4524f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4534f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 4544f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4554f7415efSAmlan Barua } 4564f7415efSAmlan Barua if (bout) { 4574f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4584f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 4594f7415efSAmlan Barua ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 4604f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4614f7415efSAmlan Barua } 462c8a8a4f0SAmlan Barua //n1 = 2*local_n1*dim[0]; 46357625b84SAmlan Barua if (fout) { 46457625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4659496c9aeSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 46657625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 46757625b84SAmlan Barua } 4684f7415efSAmlan Barua #else 4694f7415efSAmlan Barua /* Get local size */ 4704f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 4714f7415efSAmlan Barua if (fin) { 4724f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4734f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4744f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4754f7415efSAmlan Barua } 4764f7415efSAmlan Barua if (fout) { 4774f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4784f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4794f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4804f7415efSAmlan Barua } 4814f7415efSAmlan Barua if (bout) { 4824f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4834f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 4844f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4854f7415efSAmlan Barua } 4864f7415efSAmlan Barua #endif 4874f7415efSAmlan Barua break; 4884f7415efSAmlan Barua case 3: 4894f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4904f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4914f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 4924f7415efSAmlan Barua if (fin) { 4934f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4944f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 4954f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4964f7415efSAmlan Barua } 4974f7415efSAmlan Barua if (bout) { 4984f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4994f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 5004f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5014f7415efSAmlan Barua } 502c8a8a4f0SAmlan Barua //n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 50357625b84SAmlan Barua if (fout) { 50457625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 50557625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 50657625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 50757625b84SAmlan Barua } 5084f7415efSAmlan Barua #else 5090c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 5100c9b18e4SAmlan Barua if (fin) { 5110c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5120c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5130c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5140c9b18e4SAmlan Barua } 5150c9b18e4SAmlan Barua if (fout) { 5160c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5170c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5180c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5190c9b18e4SAmlan Barua } 5200c9b18e4SAmlan Barua if (bout) { 5210c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5220c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5230c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5240c9b18e4SAmlan Barua } 5254f7415efSAmlan Barua #endif 5264f7415efSAmlan Barua break; 5274f7415efSAmlan Barua default: 5284f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5294f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 5304f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 5314f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5324f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 5334f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 5344f7415efSAmlan Barua 5354f7415efSAmlan Barua if (fin) { 5364f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5374f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 5384f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5394f7415efSAmlan Barua } 5404f7415efSAmlan Barua if (bout) { 5414f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5424f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 5439496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5444f7415efSAmlan Barua } 545c8a8a4f0SAmlan Barua //temp = fftw->partial_dim; 546c8a8a4f0SAmlan Barua //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 547c8a8a4f0SAmlan Barua //n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 54857625b84SAmlan Barua if (fout) { 54957625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 55057625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 55157625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 55257625b84SAmlan Barua } 5534f7415efSAmlan Barua #else 5540c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 5550c9b18e4SAmlan Barua if (fin) { 5560c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5570c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5580c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5590c9b18e4SAmlan Barua } 5600c9b18e4SAmlan Barua if (fout) { 5610c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5620c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5630c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5640c9b18e4SAmlan Barua } 5650c9b18e4SAmlan Barua if (bout) { 5660c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5670c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5680c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5690c9b18e4SAmlan Barua } 5704f7415efSAmlan Barua #endif 5714f7415efSAmlan Barua break; 5724f7415efSAmlan Barua } 5734f7415efSAmlan Barua } 5744f7415efSAmlan Barua PetscFunctionReturn(0); 5754f7415efSAmlan Barua } 5764f7415efSAmlan Barua EXTERN_C_END 5774f7415efSAmlan Barua 578c92418ccSAmlan Barua #undef __FUNCT__ 579b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW" 580b2b77a04SHong Zhang /* 581b2b77a04SHong Zhang MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 582b2b77a04SHong Zhang parallel layout determined by FFTW 583b2b77a04SHong Zhang 584b2b77a04SHong Zhang Collective on Mat 585b2b77a04SHong Zhang 586b2b77a04SHong Zhang Input Parameter: 587b2b77a04SHong Zhang . mat - the matrix 588b2b77a04SHong Zhang 589b2b77a04SHong Zhang Output Parameter: 590b2b77a04SHong Zhang + fin - (optional) input vector of forward FFTW 591b2b77a04SHong Zhang - fout - (optional) output vector of forward FFTW 592b2b77a04SHong Zhang 593b2b77a04SHong Zhang Level: advanced 594b2b77a04SHong Zhang 595b2b77a04SHong Zhang .seealso: MatCreateFFTW() 596b2b77a04SHong Zhang */ 597b2b77a04SHong Zhang PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 598b2b77a04SHong Zhang { 599b2b77a04SHong Zhang PetscErrorCode ierr; 600b2b77a04SHong Zhang PetscMPIInt size,rank; 601b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 602b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 603c92418ccSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 6049496c9aeSAmlan Barua PetscInt N=fft->N; 605e81bb599SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 606b2b77a04SHong Zhang 607b2b77a04SHong Zhang PetscFunctionBegin; 608b2b77a04SHong Zhang PetscValidHeaderSpecific(A,MAT_CLASSID,1); 609b2b77a04SHong Zhang PetscValidType(A,1); 610b2b77a04SHong Zhang 611b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 612b2b77a04SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 613b2b77a04SHong Zhang if (size == 1){ /* sequential case */ 614e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX) 615b2b77a04SHong Zhang if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 616b2b77a04SHong Zhang if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 617e5d7f247SAmlan Barua #else 618e81bb599SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 619e81bb599SAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 620e81bb599SAmlan Barua #endif 621b2b77a04SHong Zhang } else { /* mpi case */ 622b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 6236882372aSHong Zhang ptrdiff_t local_n1,local_1_end; 624b2b77a04SHong Zhang fftw_complex *data_fin,*data_fout; 6259496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 62651d1eed7SAmlan Barua double *data_finr ; 627b3a17365SAmlan Barua ptrdiff_t local_1_start,temp; 6289496c9aeSAmlan Barua PetscInt vsize,n1,N1; 6299496c9aeSAmlan Barua #endif 6309496c9aeSAmlan Barua 631c92418ccSAmlan Barua // PetscInt ctr; 632c92418ccSAmlan Barua // ptrdiff_t ndim1,*pdim; 633c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 634c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 63511902ff2SHong Zhang 636c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 637f76f76beSAmlan Barua // {k 638c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 639c92418ccSAmlan Barua // } 640b2b77a04SHong Zhang 641f76f76beSAmlan Barua 642f76f76beSAmlan Barua 643b2b77a04SHong Zhang switch (ndim){ 644b2b77a04SHong Zhang case 1: 6456882372aSHong Zhang /* Get local size */ 6466882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 6476882372aSHong Zhang if (fin) { 6486882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6496882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 6506882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6516882372aSHong Zhang } 6526882372aSHong Zhang if (fout) { 6536882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 65457625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6556882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6566882372aSHong Zhang } 657b2b77a04SHong Zhang break; 658b2b77a04SHong Zhang case 2: 6593c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6603c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 6613c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 6623c3a9c75SAmlan Barua if (fin) { 6633c3a9c75SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 66454dd5118SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 6653c3a9c75SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 66609dd8a53SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 6673c3a9c75SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6683c3a9c75SAmlan Barua } 66957625b84SAmlan Barua n1 = 2*local_n1*(dim[0]); 67057625b84SAmlan Barua //n1 = 2*local_n1*dim[1]; 671b4c3921fSHong Zhang //printf("The values are %ld\n",local_n1); 6723c3a9c75SAmlan Barua if (fout) { 67309dd8a53SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 67409dd8a53SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6753c3a9c75SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6763c3a9c75SAmlan Barua } 6773c3a9c75SAmlan Barua 6783c3a9c75SAmlan Barua #else 679b2b77a04SHong Zhang /* Get local size */ 68064657d84SAmlan Barua //printf("Hope this does not come here"); 681b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 682b2b77a04SHong Zhang if (fin) { 683b2b77a04SHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 684b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 685b2b77a04SHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 686b2b77a04SHong Zhang } 687b2b77a04SHong Zhang if (fout) { 688b2b77a04SHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 689b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 690b2b77a04SHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 691b2b77a04SHong Zhang } 69264657d84SAmlan Barua //printf("Hope this does not come here"); 6933c3a9c75SAmlan Barua #endif 694b2b77a04SHong Zhang break; 695b2b77a04SHong Zhang case 3: 6966882372aSHong Zhang /* Get local size */ 6973c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 69851d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 69951d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 70051d1eed7SAmlan Barua if (fin) { 70151d1eed7SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 70251d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 70351d1eed7SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 70451d1eed7SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 70551d1eed7SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 70651d1eed7SAmlan Barua } 70757625b84SAmlan Barua n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 70851d1eed7SAmlan Barua if (fout) { 70951d1eed7SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 71051d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 71151d1eed7SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 71251d1eed7SAmlan Barua } 71351d1eed7SAmlan Barua 71451d1eed7SAmlan Barua 7153c3a9c75SAmlan Barua #else 7166882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 71711902ff2SHong Zhang // printf("The quantity n is %d",n); 7186882372aSHong Zhang if (fin) { 7196882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7206882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 7216882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 7226882372aSHong Zhang } 7236882372aSHong Zhang if (fout) { 7246882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7256882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 7266882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 7276882372aSHong Zhang } 7283c3a9c75SAmlan Barua #endif 729b2b77a04SHong Zhang break; 730b2b77a04SHong Zhang default: 7316882372aSHong Zhang /* Get local size */ 7323c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 733b3a17365SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 734b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 735b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 736b3a17365SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 737b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 738b3a17365SAmlan Barua if (fin) { 739b3a17365SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 740b3a17365SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 741b3a17365SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 742b3a17365SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 743b3a17365SAmlan Barua } 74457625b84SAmlan Barua temp = fftw->partial_dim; 74557625b84SAmlan Barua fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 74657625b84SAmlan Barua 74757625b84SAmlan Barua n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 748b3a17365SAmlan Barua if (fout) { 749b3a17365SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 75057625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 751b3a17365SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 752b3a17365SAmlan Barua } 753b3a17365SAmlan Barua 7543c3a9c75SAmlan Barua #else 755c92418ccSAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 7566882372aSHong Zhang if (fin) { 7576882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7586882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 7596882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 7606882372aSHong Zhang } 7616882372aSHong Zhang if (fout) { 7626882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7636882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 7646882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 7656882372aSHong Zhang } 7663c3a9c75SAmlan Barua #endif 767b2b77a04SHong Zhang break; 768b2b77a04SHong Zhang } 769b2b77a04SHong Zhang } 77054dd5118SAmlan Barua // if (fin){ 77154dd5118SAmlan Barua // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 77254dd5118SAmlan Barua // } 77354dd5118SAmlan Barua // if (fout){ 77454dd5118SAmlan Barua // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 77554dd5118SAmlan Barua // } 776b2b77a04SHong Zhang PetscFunctionReturn(0); 777b2b77a04SHong Zhang } 778b2b77a04SHong Zhang 779b2b77a04SHong Zhang #undef __FUNCT__ 78074a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW" 78174a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 7823c3a9c75SAmlan Barua { 7833c3a9c75SAmlan Barua PetscErrorCode ierr; 7843c3a9c75SAmlan Barua PetscFunctionBegin; 78574a26884SAmlan Barua ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 7863c3a9c75SAmlan Barua PetscFunctionReturn(0); 7873c3a9c75SAmlan Barua } 78854efbe56SHong Zhang 789b2b77a04SHong Zhang /* 790*97504071SAmlan Barua VecScatterPetscToFFTW_FFTW - Copies the user data to the vector that goes into FFTW block. 7913c3a9c75SAmlan Barua Input A, x, y 7923c3a9c75SAmlan Barua A - FFTW matrix 79354dd5118SAmlan Barua x - user data 794b2b77a04SHong Zhang Options Database Keys: 795b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 796b2b77a04SHong Zhang 797b2b77a04SHong Zhang Level: intermediate 798*97504071SAmlan Barua Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 799*97504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 800*97504071SAmlan Barua zeros. This routine does that job by scattering operation. 801*97504071SAmlan Barua 802b2b77a04SHong Zhang 803b2b77a04SHong Zhang */ 8043c3a9c75SAmlan Barua 8053c3a9c75SAmlan Barua EXTERN_C_BEGIN 8063c3a9c75SAmlan Barua #undef __FUNCT__ 80774a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW_FTTW" 80874a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 8093c3a9c75SAmlan Barua { 8103c3a9c75SAmlan Barua PetscErrorCode ierr; 8113c3a9c75SAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 8123c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 8133c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 8149496c9aeSAmlan Barua PetscInt N=fft->N; 815b223cc97SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 8169496c9aeSAmlan Barua PetscInt low; 8178ca4c034SAmlan Barua PetscInt rank,size,vsize,vsize1; 8183c3a9c75SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 8199496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 8203c3a9c75SAmlan Barua VecScatter vecscat; 8213c3a9c75SAmlan Barua IS list1,list2; 8229496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 8239496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 8249496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 8259496c9aeSAmlan Barua PetscInt N1, n1 ,NM; 8269496c9aeSAmlan Barua ptrdiff_t temp; 8279496c9aeSAmlan Barua #endif 8283c3a9c75SAmlan Barua 829f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 830f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 8313c3a9c75SAmlan Barua ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 8323c3a9c75SAmlan Barua 833e81bb599SAmlan Barua if (size==1) 834e81bb599SAmlan Barua { 8358ca4c034SAmlan Barua ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 8368ca4c034SAmlan Barua ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 837b4c3921fSHong Zhang //printf("The values of sizes are %d and %d",vsize,vsize1); 8389496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 8396971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 8406971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8416971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8426971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 843b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 844e81bb599SAmlan Barua } 845e81bb599SAmlan Barua 846e81bb599SAmlan Barua else{ 847e81bb599SAmlan Barua 8483c3a9c75SAmlan Barua switch (ndim){ 8493c3a9c75SAmlan Barua case 1: 85064657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 85164657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 85264657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 85364657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 85464657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 85564657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 85664657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 85764657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 85864657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 85964657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 86064657d84SAmlan Barua #else 861e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 86264657d84SAmlan Barua #endif 8633c3a9c75SAmlan Barua break; 8643c3a9c75SAmlan Barua case 2: 865bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 866bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 867bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 868bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 869bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 870bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 873bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 874bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 875bd59e6c6SAmlan Barua #else 8763c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 8773c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 8789496c9aeSAmlan Barua //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 8799496c9aeSAmlan Barua //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 8809496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 8819496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 8823c3a9c75SAmlan Barua 8833c3a9c75SAmlan Barua if (dim[1]%2==0) 8843c3a9c75SAmlan Barua NM = dim[1]+2; 8853c3a9c75SAmlan Barua else 8863c3a9c75SAmlan Barua NM = dim[1]+1; 8873c3a9c75SAmlan Barua 8883c3a9c75SAmlan Barua for (i=0;i<local_n0;i++){ 8893c3a9c75SAmlan Barua for (j=0;j<dim[1];j++){ 8903c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 8913c3a9c75SAmlan Barua tempindx1 = i*NM + j; 8925b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 8933c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 8943c3a9c75SAmlan Barua } 8953c3a9c75SAmlan Barua } 8963c3a9c75SAmlan Barua 8973c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 8983c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 8993c3a9c75SAmlan Barua 900f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 901f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 902f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 903f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 904b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 905b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 906b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 907b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 908bd59e6c6SAmlan Barua #endif 9099496c9aeSAmlan Barua break; 9103c3a9c75SAmlan Barua 9113c3a9c75SAmlan Barua case 3: 912bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 913bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 914bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 915bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 916bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 917bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 918bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 919bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 920bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 921bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 922bd59e6c6SAmlan Barua #else 92351d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 92451d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 92551d1eed7SAmlan Barua 9269496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 9279496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 92851d1eed7SAmlan Barua 92951d1eed7SAmlan Barua if (dim[2]%2==0) 93051d1eed7SAmlan Barua NM = dim[2]+2; 93151d1eed7SAmlan Barua else 93251d1eed7SAmlan Barua NM = dim[2]+1; 93351d1eed7SAmlan Barua 93451d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 93551d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 93651d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 93751d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 93851d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 93951d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 94051d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 94151d1eed7SAmlan Barua } 94251d1eed7SAmlan Barua } 94351d1eed7SAmlan Barua } 94451d1eed7SAmlan Barua 94551d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 94651d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 94751d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 94851d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 94951d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 95051d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 951b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 952b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 953b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 954b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 955bd59e6c6SAmlan Barua #endif 9569496c9aeSAmlan Barua break; 9573c3a9c75SAmlan Barua 9583c3a9c75SAmlan Barua default: 959bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 960bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 961bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 962bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 963bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 964bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 965bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 966bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 967bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 968bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 969bd59e6c6SAmlan Barua #else 970e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 971e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 972e5d7f247SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 973e5d7f247SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 974e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 975e5d7f247SAmlan Barua 976e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 977e5d7f247SAmlan Barua 978e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 979e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 980e5d7f247SAmlan Barua 981e5d7f247SAmlan Barua if (dim[ndim-1]%2==0) 982ba6e06d0SAmlan Barua NM = 2; 983e5d7f247SAmlan Barua else 984ba6e06d0SAmlan Barua NM = 1; 985e5d7f247SAmlan Barua 9866971673cSAmlan Barua j = low; 9876971673cSAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 9886971673cSAmlan Barua { 9896971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 9906971673cSAmlan Barua indx2[i] = j; 9916971673cSAmlan Barua if (k%dim[ndim-1]==0) 9926971673cSAmlan Barua { j+=NM;} 9936971673cSAmlan Barua j++; 9946971673cSAmlan Barua } 9956971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 9966971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 9976971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 9986971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9996971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10006971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1001b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1002b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1003b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1004b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1005bd59e6c6SAmlan Barua #endif 10069496c9aeSAmlan Barua break; 10073c3a9c75SAmlan Barua } 1008e81bb599SAmlan Barua } 10091abc6020SAmlan Barua return(0); 10103c3a9c75SAmlan Barua } 10113c3a9c75SAmlan Barua EXTERN_C_END 10123c3a9c75SAmlan Barua 10133c3a9c75SAmlan Barua #undef __FUNCT__ 101474a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc" 101574a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 10163c3a9c75SAmlan Barua { 10173c3a9c75SAmlan Barua PetscErrorCode ierr; 10183c3a9c75SAmlan Barua PetscFunctionBegin; 101974a26884SAmlan Barua ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 10203c3a9c75SAmlan Barua PetscFunctionReturn(0); 10213c3a9c75SAmlan Barua } 102254efbe56SHong Zhang 10235b3e41ffSAmlan Barua /* 1024*97504071SAmlan Barua VecScatterFFTWToPetsc_FFTW - Converts FFTW output to the PETSc vector that user can use. 10255b3e41ffSAmlan Barua Input A, x, y 10265b3e41ffSAmlan Barua A - FFTW matrix 10275b3e41ffSAmlan Barua x - FFTW vector 10285b3e41ffSAmlan Barua y - PETSc vector that user can read 10295b3e41ffSAmlan Barua 10305b3e41ffSAmlan Barua Level: intermediate 1031*97504071SAmlan Barua Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 1032*97504071SAmlan Barua VecScatterFFTWToPetsc removes those extra zeros. 10335b3e41ffSAmlan Barua 10343c3a9c75SAmlan Barua */ 10353c3a9c75SAmlan Barua 10363c3a9c75SAmlan Barua EXTERN_C_BEGIN 10373c3a9c75SAmlan Barua #undef __FUNCT__ 103874a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 103974a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 10405b3e41ffSAmlan Barua { 10415b3e41ffSAmlan Barua PetscErrorCode ierr; 10425b3e41ffSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 10435b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 10445b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 10459496c9aeSAmlan Barua PetscInt N=fft->N; 1046b3655f67SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 10479496c9aeSAmlan Barua PetscInt low; 10489496c9aeSAmlan Barua PetscInt rank,size; 10495b3e41ffSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 10509496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 10515b3e41ffSAmlan Barua VecScatter vecscat; 10525b3e41ffSAmlan Barua IS list1,list2; 10539496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 10549496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 10559496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 10569496c9aeSAmlan Barua PetscInt N1, n1 ,NM; 10579496c9aeSAmlan Barua ptrdiff_t temp; 10589496c9aeSAmlan Barua #endif 10599496c9aeSAmlan Barua 10605b3e41ffSAmlan Barua 10615b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 10625b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1063b3655f67SAmlan Barua ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr); 10645b3e41ffSAmlan Barua 1065e81bb599SAmlan Barua if (size==1){ 1066b3655f67SAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 10676971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 10686971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10696971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10706971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1071b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1072e81bb599SAmlan Barua } 1073e81bb599SAmlan Barua else{ 1074e81bb599SAmlan Barua 1075e81bb599SAmlan Barua switch (ndim){ 1076e81bb599SAmlan Barua case 1: 107764657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 107864657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 10799496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1); 10809496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2); 108164657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 108264657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 108364657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 108464657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 108564657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 108664657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 108764657d84SAmlan Barua #else 10886a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 108964657d84SAmlan Barua #endif 10905b3e41ffSAmlan Barua break; 10915b3e41ffSAmlan Barua case 2: 1092bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1093bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1094bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 1095bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 1096bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1097bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1098bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1099bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1100bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1101bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1102bd59e6c6SAmlan Barua #else 11035b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 11045b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 11055b3e41ffSAmlan Barua 11069496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 11079496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 11085b3e41ffSAmlan Barua 11095b3e41ffSAmlan Barua if (dim[1]%2==0) 11105b3e41ffSAmlan Barua NM = dim[1]+2; 11115b3e41ffSAmlan Barua else 11125b3e41ffSAmlan Barua NM = dim[1]+1; 11135b3e41ffSAmlan Barua 11145b3e41ffSAmlan Barua for (i=0;i<local_n0;i++){ 11155b3e41ffSAmlan Barua for (j=0;j<dim[1];j++){ 11165b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 11175b3e41ffSAmlan Barua tempindx1 = i*NM + j; 11185b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 11195b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 11205b3e41ffSAmlan Barua } 11215b3e41ffSAmlan Barua } 11225b3e41ffSAmlan Barua 11235b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 11245b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 11255b3e41ffSAmlan Barua 11265b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 11275b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 11285b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 11295b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1130b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1131b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1132b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1133b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1134bd59e6c6SAmlan Barua #endif 11359496c9aeSAmlan Barua break; 11365b3e41ffSAmlan Barua case 3: 1137bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1138bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1139bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1140bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1141bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1142bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1143bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1144bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1145bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1146bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1147bd59e6c6SAmlan Barua #else 1148bd59e6c6SAmlan Barua 114951d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 115051d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 115151d1eed7SAmlan Barua 11529496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 11539496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 11549496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 11559496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 115651d1eed7SAmlan Barua 115751d1eed7SAmlan Barua if (dim[2]%2==0) 115851d1eed7SAmlan Barua NM = dim[2]+2; 115951d1eed7SAmlan Barua else 116051d1eed7SAmlan Barua NM = dim[2]+1; 116151d1eed7SAmlan Barua 116251d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 116351d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 116451d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 116551d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 116651d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 116751d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 116851d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 116951d1eed7SAmlan Barua } 117051d1eed7SAmlan Barua } 117151d1eed7SAmlan Barua } 117251d1eed7SAmlan Barua 117351d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 117451d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 117551d1eed7SAmlan Barua 117651d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 117751d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 117851d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 117951d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1180b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1181b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1182b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1183b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1184bd59e6c6SAmlan Barua #endif 11859496c9aeSAmlan Barua break; 11865b3e41ffSAmlan Barua default: 1187bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1188bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1189bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1190bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1191bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1192bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1193bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1194bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1195bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1196bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1197bd59e6c6SAmlan Barua #else 1198ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1199ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1200ba6e06d0SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1201ba6e06d0SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1202ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1203ba6e06d0SAmlan Barua 1204ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1205ba6e06d0SAmlan Barua 1206ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1207ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1208ba6e06d0SAmlan Barua 1209ba6e06d0SAmlan Barua if (dim[ndim-1]%2==0) 1210ba6e06d0SAmlan Barua NM = 2; 1211ba6e06d0SAmlan Barua else 1212ba6e06d0SAmlan Barua NM = 1; 1213ba6e06d0SAmlan Barua 1214ba6e06d0SAmlan Barua j = low; 1215ba6e06d0SAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1216ba6e06d0SAmlan Barua { 1217ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1218ba6e06d0SAmlan Barua indx2[i] = j; 1219ba6e06d0SAmlan Barua if (k%dim[ndim-1]==0) 1220ba6e06d0SAmlan Barua { j+=NM;} 1221ba6e06d0SAmlan Barua j++; 1222ba6e06d0SAmlan Barua } 1223ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1224ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1225ba6e06d0SAmlan Barua 1226ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1227ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1228ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1230b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1231b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1232b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1233b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1234bd59e6c6SAmlan Barua #endif 12359496c9aeSAmlan Barua break; 12365b3e41ffSAmlan Barua } 1237e81bb599SAmlan Barua } 12385b3e41ffSAmlan Barua return 0; 12395b3e41ffSAmlan Barua } 12405b3e41ffSAmlan Barua EXTERN_C_END 12415b3e41ffSAmlan Barua 12425b3e41ffSAmlan Barua EXTERN_C_BEGIN 12435b3e41ffSAmlan Barua #undef __FUNCT__ 12443c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 12453c3a9c75SAmlan Barua /* 12463c3a9c75SAmlan Barua MatCreate_FFTW - Creates a matrix object that provides FFT 12473c3a9c75SAmlan Barua via the external package FFTW 12483c3a9c75SAmlan Barua Options Database Keys: 12493c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 12503c3a9c75SAmlan Barua 12513c3a9c75SAmlan Barua Level: intermediate 12523c3a9c75SAmlan Barua 12533c3a9c75SAmlan Barua */ 12543c3a9c75SAmlan Barua 1255b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 1256b2b77a04SHong Zhang { 1257b2b77a04SHong Zhang PetscErrorCode ierr; 1258b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 1259b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1260b2b77a04SHong Zhang Mat_FFTW *fftw; 1261b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1262b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1263b2b77a04SHong Zhang PetscBool flg; 12644f48bc67SAmlan Barua PetscInt p_flag,partial_dim=1,ctr; 126511902ff2SHong Zhang PetscMPIInt size,rank; 12669496c9aeSAmlan Barua ptrdiff_t *pdim; 12679496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 12689496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12699496c9aeSAmlan Barua ptrdiff_t temp; 12708ca4c034SAmlan Barua PetscInt N1,tot_dim; 12714f48bc67SAmlan Barua #else 12724f48bc67SAmlan Barua PetscInt n1; 12739496c9aeSAmlan Barua #endif 12749496c9aeSAmlan Barua 1275b2b77a04SHong Zhang PetscFunctionBegin; 1276b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 127711902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1278c92418ccSAmlan Barua 127911902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 128011902ff2SHong Zhang pdim[0] = dim[0]; 12818ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12828ca4c034SAmlan Barua tot_dim = 2*dim[0]; 12838ca4c034SAmlan Barua #endif 128411902ff2SHong Zhang for (ctr=1;ctr<ndim;ctr++) 128511902ff2SHong Zhang { 12866882372aSHong Zhang partial_dim *= dim[ctr]; 128711902ff2SHong Zhang pdim[ctr] = dim[ctr]; 12888ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12898ca4c034SAmlan Barua if (ctr==ndim-1) 12908ca4c034SAmlan Barua tot_dim *= (dim[ctr]/2+1); 12918ca4c034SAmlan Barua else 12928ca4c034SAmlan Barua tot_dim *= dim[ctr]; 12938ca4c034SAmlan Barua #endif 12946882372aSHong Zhang } 12953c3a9c75SAmlan Barua 1296b2b77a04SHong Zhang if (size == 1) { 1297e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1298b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1299b2b77a04SHong Zhang n = N; 1300e81bb599SAmlan Barua #else 1301e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1302e81bb599SAmlan Barua n = tot_dim; 1303e81bb599SAmlan Barua #endif 1304e81bb599SAmlan Barua 1305b2b77a04SHong Zhang } else { 13061abc6020SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 1307b2b77a04SHong Zhang switch (ndim){ 1308b2b77a04SHong Zhang case 1: 13093c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 13103c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1311e5d7f247SAmlan Barua #else 13129496c9aeSAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 13136882372aSHong Zhang n = (PetscInt)local_n0; 13149496c9aeSAmlan Barua n1 = (PetscInt) local_n1; 13159496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1316e5d7f247SAmlan Barua #endif 1317b2b77a04SHong Zhang break; 1318b2b77a04SHong Zhang case 2: 13195b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 1320b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1321b2b77a04SHong Zhang /* 1322b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1323b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 1324b2b77a04SHong Zhang */ 1325b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1326b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 13275b3e41ffSAmlan Barua #else 13285b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 13295b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1330c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 13315b3e41ffSAmlan Barua #endif 1332b2b77a04SHong Zhang break; 1333b2b77a04SHong Zhang case 3: 133451d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 133551d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 13366882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 13376882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 133851d1eed7SAmlan Barua #else 133951d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 134051d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1341c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 134251d1eed7SAmlan Barua #endif 1343b2b77a04SHong Zhang break; 1344b2b77a04SHong Zhang default: 1345b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 134611902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 13476882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 13486882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1349b3a17365SAmlan Barua #else 1350b3a17365SAmlan Barua temp = pdim[ndim-1]; 1351b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 1352b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1353b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1354b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1355b3a17365SAmlan Barua pdim[ndim-1] = temp; 1356c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1357b3a17365SAmlan Barua #endif 1358b2b77a04SHong Zhang break; 1359b2b77a04SHong Zhang } 1360b2b77a04SHong Zhang } 1361b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1362b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1363b2b77a04SHong Zhang fft->data = (void*)fftw; 1364b2b77a04SHong Zhang 1365b2b77a04SHong Zhang fft->n = n; 1366c92418ccSAmlan Barua fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1367e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 1368c92418ccSAmlan Barua ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1369b1301b2eSHong Zhang for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1370c92418ccSAmlan Barua 1371b2b77a04SHong Zhang fftw->p_forward = 0; 1372b2b77a04SHong Zhang fftw->p_backward = 0; 1373b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1374b2b77a04SHong Zhang 1375b2b77a04SHong Zhang if (size == 1){ 1376b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1377b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1378b2b77a04SHong Zhang } else { 1379b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1380b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1381b2b77a04SHong Zhang } 1382b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 13834be45526SHong Zhang //A->ops->getvecs = MatGetVecs_FFTW; 1384b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 13854be45526SHong Zhang PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW); 138674a26884SAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW); 138774a26884SAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW); 1388b2b77a04SHong Zhang 1389b2b77a04SHong Zhang /* get runtime options */ 1390b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1391b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1392b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 1393b2b77a04SHong Zhang PetscOptionsEnd(); 1394b2b77a04SHong Zhang PetscFunctionReturn(0); 1395b2b77a04SHong Zhang } 1396b2b77a04SHong Zhang EXTERN_C_END 13973c3a9c75SAmlan Barua 13983c3a9c75SAmlan Barua 13993c3a9c75SAmlan Barua 14003c3a9c75SAmlan Barua 1401