1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14e5d7f247SAmlan Barua PetscInt partial_dim; 15b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 16b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 19b2b77a04SHong Zhang } Mat_FFTW; 20b2b77a04SHong Zhang 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 274be45526SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); // to be removed! 28b2b77a04SHong Zhang 29b2b77a04SHong Zhang #undef __FUNCT__ 30b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 31b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 32b2b77a04SHong Zhang { 33b2b77a04SHong Zhang PetscErrorCode ierr; 34b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 35b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 36b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 37b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 38b2b77a04SHong Zhang 39b2b77a04SHong Zhang PetscFunctionBegin; 40b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 41b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 42b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 43b2b77a04SHong Zhang switch (ndim){ 44b2b77a04SHong Zhang case 1: 4558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 46b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 4758a912c5SAmlan Barua #else 4858a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 4958a912c5SAmlan Barua #endif 50b2b77a04SHong Zhang break; 51b2b77a04SHong Zhang case 2: 5258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 53b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 5458a912c5SAmlan Barua #else 5558a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 5658a912c5SAmlan Barua #endif 57b2b77a04SHong Zhang break; 58b2b77a04SHong Zhang case 3: 5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 60b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6158a912c5SAmlan Barua #else 6258a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 6358a912c5SAmlan Barua #endif 64b2b77a04SHong Zhang break; 65b2b77a04SHong Zhang default: 6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 67b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6858a912c5SAmlan Barua #else 6958a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 7058a912c5SAmlan Barua #endif 71b2b77a04SHong Zhang break; 72b2b77a04SHong Zhang } 73b2b77a04SHong Zhang fftw->finarray = x_array; 74b2b77a04SHong Zhang fftw->foutarray = y_array; 75b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 76b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 77b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 78b2b77a04SHong Zhang } else { /* use existing plan */ 79b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 80b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 81b2b77a04SHong Zhang } else { 82b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 83b2b77a04SHong Zhang } 84b2b77a04SHong Zhang } 85b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 86b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 87b2b77a04SHong Zhang PetscFunctionReturn(0); 88b2b77a04SHong Zhang } 89b2b77a04SHong Zhang 90b2b77a04SHong Zhang #undef __FUNCT__ 91b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 92b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 93b2b77a04SHong Zhang { 94b2b77a04SHong Zhang PetscErrorCode ierr; 95b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 96b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 97b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 98b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 99b2b77a04SHong Zhang 100b2b77a04SHong Zhang PetscFunctionBegin; 101b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 102b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 103b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 104b2b77a04SHong Zhang switch (ndim){ 105b2b77a04SHong Zhang case 1: 10658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 107b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 10858a912c5SAmlan Barua #else 109e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 11058a912c5SAmlan Barua #endif 111b2b77a04SHong Zhang break; 112b2b77a04SHong Zhang case 2: 11358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 114b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 11558a912c5SAmlan Barua #else 116e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 11758a912c5SAmlan Barua #endif 118b2b77a04SHong Zhang break; 119b2b77a04SHong Zhang case 3: 12058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 121b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12258a912c5SAmlan Barua #else 123e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 12458a912c5SAmlan Barua #endif 125b2b77a04SHong Zhang break; 126b2b77a04SHong Zhang default: 12758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 128b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12958a912c5SAmlan Barua #else 130e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 13158a912c5SAmlan Barua #endif 132b2b77a04SHong Zhang break; 133b2b77a04SHong Zhang } 134b2b77a04SHong Zhang fftw->binarray = x_array; 135b2b77a04SHong Zhang fftw->boutarray = y_array; 136b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 137b2b77a04SHong Zhang } else { /* use existing plan */ 138b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 139b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 140b2b77a04SHong Zhang } else { 141b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 142b2b77a04SHong Zhang } 143b2b77a04SHong Zhang } 144b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 145b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 146b2b77a04SHong Zhang PetscFunctionReturn(0); 147b2b77a04SHong Zhang } 148b2b77a04SHong Zhang 149b2b77a04SHong Zhang #undef __FUNCT__ 150b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 151b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 152b2b77a04SHong Zhang { 153b2b77a04SHong Zhang PetscErrorCode ierr; 154b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 155b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 156b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 157c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 158b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 159b2b77a04SHong Zhang 160b2b77a04SHong Zhang PetscFunctionBegin; 161b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 162b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 163b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 164b2b77a04SHong Zhang switch (ndim){ 165b2b77a04SHong Zhang case 1: 1663c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 167b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 168ae0a50aaSHong Zhang #else 169ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 1703c3a9c75SAmlan Barua #endif 171b2b77a04SHong Zhang break; 172b2b77a04SHong Zhang case 2: 1733c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 174b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1753c3a9c75SAmlan Barua #else 1763c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1773c3a9c75SAmlan Barua #endif 178b2b77a04SHong Zhang break; 179b2b77a04SHong Zhang case 3: 1803c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 181b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1823c3a9c75SAmlan Barua #else 18351d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1843c3a9c75SAmlan Barua #endif 185b2b77a04SHong Zhang break; 186b2b77a04SHong Zhang default: 1873c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 188c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1893c3a9c75SAmlan Barua #else 1903c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1913c3a9c75SAmlan Barua #endif 192b2b77a04SHong Zhang break; 193b2b77a04SHong Zhang } 194b2b77a04SHong Zhang fftw->finarray = x_array; 195b2b77a04SHong Zhang fftw->foutarray = y_array; 196b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 197b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 198b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 199b2b77a04SHong Zhang } else { /* use existing plan */ 200b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 201b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 202b2b77a04SHong Zhang } else { 203b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 204b2b77a04SHong Zhang } 205b2b77a04SHong Zhang } 206b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 207b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 208b2b77a04SHong Zhang PetscFunctionReturn(0); 209b2b77a04SHong Zhang } 210b2b77a04SHong Zhang 211b2b77a04SHong Zhang #undef __FUNCT__ 212b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 213b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 214b2b77a04SHong Zhang { 215b2b77a04SHong Zhang PetscErrorCode ierr; 216b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 217b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 218b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 219c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 220b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 221b2b77a04SHong Zhang 222b2b77a04SHong Zhang PetscFunctionBegin; 223b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 224b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 225b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 226b2b77a04SHong Zhang switch (ndim){ 227b2b77a04SHong Zhang case 1: 2283c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 229b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 230ae0a50aaSHong Zhang #else 231ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 2323c3a9c75SAmlan Barua #endif 233b2b77a04SHong Zhang break; 234b2b77a04SHong Zhang case 2: 2353c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 236b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2373c3a9c75SAmlan Barua #else 2383c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2393c3a9c75SAmlan Barua #endif 240b2b77a04SHong Zhang break; 241b2b77a04SHong Zhang case 3: 2423c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 243b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2443c3a9c75SAmlan Barua #else 2453c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2463c3a9c75SAmlan Barua #endif 247b2b77a04SHong Zhang break; 248b2b77a04SHong Zhang default: 2493c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 250c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2513c3a9c75SAmlan Barua #else 2523c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2533c3a9c75SAmlan Barua #endif 254b2b77a04SHong Zhang break; 255b2b77a04SHong Zhang } 256b2b77a04SHong Zhang fftw->binarray = x_array; 257b2b77a04SHong Zhang fftw->boutarray = y_array; 258b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 259b2b77a04SHong Zhang } else { /* use existing plan */ 260b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 261b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 262b2b77a04SHong Zhang } else { 263b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 264b2b77a04SHong Zhang } 265b2b77a04SHong Zhang } 266b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 267b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 268b2b77a04SHong Zhang PetscFunctionReturn(0); 269b2b77a04SHong Zhang } 270b2b77a04SHong Zhang 271b2b77a04SHong Zhang #undef __FUNCT__ 272b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 273b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 274b2b77a04SHong Zhang { 275b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 276b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 277b2b77a04SHong Zhang PetscErrorCode ierr; 278b2b77a04SHong Zhang 279b2b77a04SHong Zhang PetscFunctionBegin; 280b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 281b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 282b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 283bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 284b2b77a04SHong Zhang PetscFunctionReturn(0); 285b2b77a04SHong Zhang } 286b2b77a04SHong Zhang 287c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 288b2b77a04SHong Zhang #undef __FUNCT__ 289b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 290b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 291b2b77a04SHong Zhang { 292b2b77a04SHong Zhang PetscErrorCode ierr; 293b2b77a04SHong Zhang PetscScalar *array; 294b2b77a04SHong Zhang 295b2b77a04SHong Zhang PetscFunctionBegin; 296b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 297b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 298b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 299b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 300b2b77a04SHong Zhang PetscFunctionReturn(0); 301b2b77a04SHong Zhang } 302b2b77a04SHong Zhang 3033c3a9c75SAmlan Barua 3044f7415efSAmlan Barua #undef __FUNCT__ 3054be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW" 3064be45526SHong Zhang /*@ 307b2aa233eSHong Zhang MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 3084f7415efSAmlan Barua parallel layout determined by FFTW 3094f7415efSAmlan Barua 3104f7415efSAmlan Barua Collective on Mat 3114f7415efSAmlan Barua 3124f7415efSAmlan Barua Input Parameter: 3134f7415efSAmlan Barua . mat - the matrix 3144f7415efSAmlan Barua 3154f7415efSAmlan Barua Output Parameter: 3164f7415efSAmlan Barua + fin - (optional) input vector of forward FFTW 3174f7415efSAmlan Barua - fout - (optional) output vector of forward FFTW 3184f7415efSAmlan Barua 3194f7415efSAmlan Barua Level: advanced 3204f7415efSAmlan Barua 3214f7415efSAmlan Barua .seealso: MatCreateFFTW() 3224be45526SHong Zhang @*/ 3234be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 3244be45526SHong Zhang { 3254be45526SHong Zhang PetscErrorCode ierr; 3264be45526SHong Zhang PetscFunctionBegin; 3274be45526SHong Zhang ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 3284be45526SHong Zhang PetscFunctionReturn(0); 3294be45526SHong Zhang } 3304be45526SHong Zhang 3314f7415efSAmlan Barua EXTERN_C_BEGIN 3324be45526SHong Zhang #undef __FUNCT__ 3334be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW" 3344be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 3354f7415efSAmlan Barua { 3364f7415efSAmlan Barua PetscErrorCode ierr; 3374f7415efSAmlan Barua PetscMPIInt size,rank; 3384f7415efSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 3394f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 3404f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 3419496c9aeSAmlan Barua PetscInt N=fft->N; 3424f7415efSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 3434f7415efSAmlan Barua 3444f7415efSAmlan Barua PetscFunctionBegin; 3454f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 3464f7415efSAmlan Barua PetscValidType(A,1); 3474f7415efSAmlan Barua 3484f7415efSAmlan Barua ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 3494f7415efSAmlan Barua ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 3504f7415efSAmlan Barua if (size == 1){ /* sequential case */ 3514f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 3524f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 3534f7415efSAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 3544f7415efSAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 3554f7415efSAmlan Barua #else 3568ca4c034SAmlan Barua // if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 3578ca4c034SAmlan Barua // if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 3588ca4c034SAmlan Barua // if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);} 3598ca4c034SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 3608ca4c034SAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 3618ca4c034SAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 3624f7415efSAmlan Barua #endif 3634f7415efSAmlan Barua } else { 3644f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 3659496c9aeSAmlan Barua ptrdiff_t local_n1; 3669496c9aeSAmlan Barua fftw_complex *data_fout; 3679496c9aeSAmlan Barua ptrdiff_t local_1_start; 3689496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 3699496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 3709496c9aeSAmlan Barua #else 3714f7415efSAmlan Barua double *data_finr,*data_boutr; 3729496c9aeSAmlan Barua PetscInt n1,N1,vsize; 3739496c9aeSAmlan Barua ptrdiff_t temp; 3749496c9aeSAmlan Barua #endif 3759496c9aeSAmlan Barua 3764f7415efSAmlan Barua switch (ndim){ 3774f7415efSAmlan Barua case 1: 37857625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 37957625b84SAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 38057625b84SAmlan Barua #else 3819496c9aeSAmlan Barua //SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet"); 38257625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 38357625b84SAmlan Barua if (fin) { 38457625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 38557625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 38657625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 38757625b84SAmlan Barua } 38857625b84SAmlan Barua if (fout) { 38957625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 39057625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 39157625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 39257625b84SAmlan Barua } 39357625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 39457625b84SAmlan Barua if (bout) { 39557625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 39657625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 39757625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 39857625b84SAmlan Barua } 39957625b84SAmlan Barua break; 40057625b84SAmlan Barua #endif 4014f7415efSAmlan Barua case 2: 4024f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4034f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4044f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 4054f7415efSAmlan Barua if (fin) { 4064f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4074f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 4084f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4094f7415efSAmlan Barua } 4104f7415efSAmlan Barua if (bout) { 4114f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4124f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 4134f7415efSAmlan Barua ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 4144f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4154f7415efSAmlan Barua } 416c8a8a4f0SAmlan Barua //n1 = 2*local_n1*dim[0]; 41757625b84SAmlan Barua if (fout) { 41857625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4199496c9aeSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 42057625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 42157625b84SAmlan Barua } 4224f7415efSAmlan Barua #else 4234f7415efSAmlan Barua /* Get local size */ 4244f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 4254f7415efSAmlan Barua if (fin) { 4264f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4274f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4284f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4294f7415efSAmlan Barua } 4304f7415efSAmlan Barua if (fout) { 4314f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4324f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4334f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4344f7415efSAmlan Barua } 4354f7415efSAmlan Barua if (bout) { 4364f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4374f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 4384f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4394f7415efSAmlan Barua } 4404f7415efSAmlan Barua #endif 4414f7415efSAmlan Barua break; 4424f7415efSAmlan Barua case 3: 4434f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4444f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4454f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 4464f7415efSAmlan Barua if (fin) { 4474f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4484f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 4494f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4504f7415efSAmlan Barua } 4514f7415efSAmlan Barua if (bout) { 4524f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4534f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 4544f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4554f7415efSAmlan Barua } 456c8a8a4f0SAmlan Barua //n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 45757625b84SAmlan Barua if (fout) { 45857625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 45957625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 46057625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 46157625b84SAmlan Barua } 4624f7415efSAmlan Barua #else 4630c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 4640c9b18e4SAmlan Barua if (fin) { 4650c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4660c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4670c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4680c9b18e4SAmlan Barua } 4690c9b18e4SAmlan Barua if (fout) { 4700c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4710c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4720c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4730c9b18e4SAmlan Barua } 4740c9b18e4SAmlan Barua if (bout) { 4750c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4760c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 4770c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4780c9b18e4SAmlan Barua } 4794f7415efSAmlan Barua #endif 4804f7415efSAmlan Barua break; 4814f7415efSAmlan Barua default: 4824f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4834f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 4844f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 4854f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4864f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 4874f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 4884f7415efSAmlan Barua 4894f7415efSAmlan Barua if (fin) { 4904f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4914f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 4924f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4934f7415efSAmlan Barua } 4944f7415efSAmlan Barua if (bout) { 4954f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4964f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 4979496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4984f7415efSAmlan Barua } 499c8a8a4f0SAmlan Barua //temp = fftw->partial_dim; 500c8a8a4f0SAmlan Barua //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 501c8a8a4f0SAmlan Barua //n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 50257625b84SAmlan Barua if (fout) { 50357625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 50457625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 50557625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 50657625b84SAmlan Barua } 5074f7415efSAmlan Barua #else 5080c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 5090c9b18e4SAmlan Barua if (fin) { 5100c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5110c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5120c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5130c9b18e4SAmlan Barua } 5140c9b18e4SAmlan Barua if (fout) { 5150c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5160c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5170c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5180c9b18e4SAmlan Barua } 5190c9b18e4SAmlan Barua if (bout) { 5200c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5210c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5220c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5230c9b18e4SAmlan Barua } 5244f7415efSAmlan Barua #endif 5254f7415efSAmlan Barua break; 5264f7415efSAmlan Barua } 5274f7415efSAmlan Barua } 5284f7415efSAmlan Barua PetscFunctionReturn(0); 5294f7415efSAmlan Barua } 5304f7415efSAmlan Barua EXTERN_C_END 5314f7415efSAmlan Barua 532c92418ccSAmlan Barua #undef __FUNCT__ 533b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW" 534b2b77a04SHong Zhang /* 535b2b77a04SHong Zhang MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 536b2b77a04SHong Zhang parallel layout determined by FFTW 537b2b77a04SHong Zhang 538b2b77a04SHong Zhang Collective on Mat 539b2b77a04SHong Zhang 540b2b77a04SHong Zhang Input Parameter: 541b2b77a04SHong Zhang . mat - the matrix 542b2b77a04SHong Zhang 543b2b77a04SHong Zhang Output Parameter: 544b2b77a04SHong Zhang + fin - (optional) input vector of forward FFTW 545b2b77a04SHong Zhang - fout - (optional) output vector of forward FFTW 546b2b77a04SHong Zhang 547b2b77a04SHong Zhang Level: advanced 548b2b77a04SHong Zhang 549b2b77a04SHong Zhang .seealso: MatCreateFFTW() 550b2b77a04SHong Zhang */ 551b2b77a04SHong Zhang PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 552b2b77a04SHong Zhang { 553b2b77a04SHong Zhang PetscErrorCode ierr; 554b2b77a04SHong Zhang PetscMPIInt size,rank; 555b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 556b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 557c92418ccSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 5589496c9aeSAmlan Barua PetscInt N=fft->N; 559e81bb599SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 560b2b77a04SHong Zhang 561b2b77a04SHong Zhang PetscFunctionBegin; 562b2b77a04SHong Zhang PetscValidHeaderSpecific(A,MAT_CLASSID,1); 563b2b77a04SHong Zhang PetscValidType(A,1); 564b2b77a04SHong Zhang 565b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 566b2b77a04SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 567b2b77a04SHong Zhang if (size == 1){ /* sequential case */ 568e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX) 569b2b77a04SHong Zhang if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 570b2b77a04SHong Zhang if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 571e5d7f247SAmlan Barua #else 572e81bb599SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 573e81bb599SAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 574e81bb599SAmlan Barua #endif 575b2b77a04SHong Zhang } else { /* mpi case */ 576b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 5776882372aSHong Zhang ptrdiff_t local_n1,local_1_end; 578b2b77a04SHong Zhang fftw_complex *data_fin,*data_fout; 5799496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 58051d1eed7SAmlan Barua double *data_finr ; 581b3a17365SAmlan Barua ptrdiff_t local_1_start,temp; 5829496c9aeSAmlan Barua PetscInt vsize,n1,N1; 5839496c9aeSAmlan Barua #endif 5849496c9aeSAmlan Barua 585c92418ccSAmlan Barua // PetscInt ctr; 586c92418ccSAmlan Barua // ptrdiff_t ndim1,*pdim; 587c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 588c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 58911902ff2SHong Zhang 590c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 591f76f76beSAmlan Barua // {k 592c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 593c92418ccSAmlan Barua // } 594b2b77a04SHong Zhang 595f76f76beSAmlan Barua 596f76f76beSAmlan Barua 597b2b77a04SHong Zhang switch (ndim){ 598b2b77a04SHong Zhang case 1: 5996882372aSHong Zhang /* Get local size */ 600e5d7f247SAmlan Barua /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */ 601c92418ccSAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 6026882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 6039496c9aeSAmlan Barua printf("The values of local_n0 and local_n1 are %ld and %ld\n",local_n0,local_n1); 6046882372aSHong Zhang if (fin) { 6056882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6066882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 6076882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6086882372aSHong Zhang } 6096882372aSHong Zhang if (fout) { 6106882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 61157625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6126882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6136882372aSHong Zhang } 614b2b77a04SHong Zhang break; 615b2b77a04SHong Zhang case 2: 6163c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6173c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 6183c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 6193c3a9c75SAmlan Barua if (fin) { 6203c3a9c75SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 62154dd5118SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 6223c3a9c75SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 62309dd8a53SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 6243c3a9c75SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6253c3a9c75SAmlan Barua } 62657625b84SAmlan Barua n1 = 2*local_n1*(dim[0]); 62757625b84SAmlan Barua //n1 = 2*local_n1*dim[1]; 62857625b84SAmlan Barua printf("The values are %ld\n",local_n1); 6293c3a9c75SAmlan Barua if (fout) { 63009dd8a53SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 63109dd8a53SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6323c3a9c75SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6333c3a9c75SAmlan Barua } 634f76f76beSAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 6353c3a9c75SAmlan Barua 6363c3a9c75SAmlan Barua #else 637b2b77a04SHong Zhang /* Get local size */ 63864657d84SAmlan Barua //printf("Hope this does not come here"); 639b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 640b2b77a04SHong Zhang if (fin) { 641b2b77a04SHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 642b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 643b2b77a04SHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 644b2b77a04SHong Zhang } 645b2b77a04SHong Zhang if (fout) { 646b2b77a04SHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 647b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 648b2b77a04SHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 649b2b77a04SHong Zhang } 65064657d84SAmlan Barua //printf("Hope this does not come here"); 6513c3a9c75SAmlan Barua #endif 652b2b77a04SHong Zhang break; 653b2b77a04SHong Zhang case 3: 6546882372aSHong Zhang /* Get local size */ 6553c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 65651d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 65751d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 65851d1eed7SAmlan Barua if (fin) { 65951d1eed7SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 66051d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 66151d1eed7SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 66251d1eed7SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 66351d1eed7SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 66451d1eed7SAmlan Barua } 66557625b84SAmlan Barua n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 66651d1eed7SAmlan Barua if (fout) { 66751d1eed7SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 66851d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 66951d1eed7SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 67051d1eed7SAmlan Barua } 67151d1eed7SAmlan Barua 67251d1eed7SAmlan Barua 6733c3a9c75SAmlan Barua #else 6746882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 67511902ff2SHong Zhang // printf("The quantity n is %d",n); 6766882372aSHong Zhang if (fin) { 6776882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6786882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 6796882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6806882372aSHong Zhang } 6816882372aSHong Zhang if (fout) { 6826882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6836882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6846882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6856882372aSHong Zhang } 6863c3a9c75SAmlan Barua #endif 687b2b77a04SHong Zhang break; 688b2b77a04SHong Zhang default: 6896882372aSHong Zhang /* Get local size */ 6903c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 691b3a17365SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 692b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 693b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 694b3a17365SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 695b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 696b3a17365SAmlan Barua if (fin) { 697b3a17365SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 698b3a17365SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 699b3a17365SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 700b3a17365SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 701b3a17365SAmlan Barua } 70257625b84SAmlan Barua temp = fftw->partial_dim; 70357625b84SAmlan Barua fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 70457625b84SAmlan Barua 70557625b84SAmlan Barua n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 706b3a17365SAmlan Barua if (fout) { 707b3a17365SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 70857625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 709b3a17365SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 710b3a17365SAmlan Barua } 711b3a17365SAmlan Barua 7123c3a9c75SAmlan Barua #else 713c92418ccSAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 7146882372aSHong Zhang if (fin) { 7156882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7166882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 7176882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 7186882372aSHong Zhang } 7196882372aSHong Zhang if (fout) { 7206882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7216882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 7226882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 7236882372aSHong Zhang } 7243c3a9c75SAmlan Barua #endif 725b2b77a04SHong Zhang break; 726b2b77a04SHong Zhang } 727b2b77a04SHong Zhang } 72854dd5118SAmlan Barua // if (fin){ 72954dd5118SAmlan Barua // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 73054dd5118SAmlan Barua // } 73154dd5118SAmlan Barua // if (fout){ 73254dd5118SAmlan Barua // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 73354dd5118SAmlan Barua // } 734b2b77a04SHong Zhang PetscFunctionReturn(0); 735b2b77a04SHong Zhang } 736b2b77a04SHong Zhang 737b2b77a04SHong Zhang #undef __FUNCT__ 7383c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT" 7393c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 7403c3a9c75SAmlan Barua { 7413c3a9c75SAmlan Barua PetscErrorCode ierr; 7423c3a9c75SAmlan Barua PetscFunctionBegin; 7433c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 7443c3a9c75SAmlan Barua PetscFunctionReturn(0); 7453c3a9c75SAmlan Barua } 74654efbe56SHong Zhang 747b2b77a04SHong Zhang /* 7489496c9aeSAmlan Barua InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block. For real 7499496c9aeSAmlan Barua parallel FFT, this routine also performs padding of right number of zeros at the end of the fastetst 7509496c9aeSAmlan Barua changing dimension. 7513c3a9c75SAmlan Barua Input A, x, y 7523c3a9c75SAmlan Barua A - FFTW matrix 75354dd5118SAmlan Barua x - user data 754b2b77a04SHong Zhang Options Database Keys: 755b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 756b2b77a04SHong Zhang 757b2b77a04SHong Zhang Level: intermediate 758b2b77a04SHong Zhang 759b2b77a04SHong Zhang */ 7603c3a9c75SAmlan Barua 7613c3a9c75SAmlan Barua EXTERN_C_BEGIN 7623c3a9c75SAmlan Barua #undef __FUNCT__ 7633c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW" 7643c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 7653c3a9c75SAmlan Barua { 7663c3a9c75SAmlan Barua PetscErrorCode ierr; 7673c3a9c75SAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 7683c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 7693c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 7709496c9aeSAmlan Barua PetscInt N=fft->N; 771b223cc97SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 7729496c9aeSAmlan Barua PetscInt low; 7738ca4c034SAmlan Barua PetscInt rank,size,vsize,vsize1; 7743c3a9c75SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 7759496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 7763c3a9c75SAmlan Barua VecScatter vecscat; 7773c3a9c75SAmlan Barua IS list1,list2; 7789496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 7799496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 7809496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 7819496c9aeSAmlan Barua PetscInt N1, n1 ,NM; 7829496c9aeSAmlan Barua ptrdiff_t temp; 7839496c9aeSAmlan Barua #endif 7843c3a9c75SAmlan Barua 785f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 786f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 7873c3a9c75SAmlan Barua ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 7883c3a9c75SAmlan Barua 789e81bb599SAmlan Barua if (size==1) 790e81bb599SAmlan Barua { 7918ca4c034SAmlan Barua ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 7928ca4c034SAmlan Barua ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 7938ca4c034SAmlan Barua printf("The values of sizes are %d and %d",vsize,vsize1); 7949496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 7956971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 7966971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7976971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7986971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 799b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 800e81bb599SAmlan Barua } 801e81bb599SAmlan Barua 802e81bb599SAmlan Barua else{ 803e81bb599SAmlan Barua 8043c3a9c75SAmlan Barua switch (ndim){ 8053c3a9c75SAmlan Barua case 1: 80664657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 80764657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 80864657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 80964657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 81064657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 81164657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 81264657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 81364657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 81464657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 81564657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 81664657d84SAmlan Barua #else 817e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 81864657d84SAmlan Barua #endif 8193c3a9c75SAmlan Barua break; 8203c3a9c75SAmlan Barua case 2: 821bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 822bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 823bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 824bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 825bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 826bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 827bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 828bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 829bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 830bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 831bd59e6c6SAmlan Barua #else 8323c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 8333c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 8349496c9aeSAmlan Barua //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 8359496c9aeSAmlan Barua //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 8369496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 8379496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 8383c3a9c75SAmlan Barua 8393c3a9c75SAmlan Barua if (dim[1]%2==0) 8403c3a9c75SAmlan Barua NM = dim[1]+2; 8413c3a9c75SAmlan Barua else 8423c3a9c75SAmlan Barua NM = dim[1]+1; 8433c3a9c75SAmlan Barua 8443c3a9c75SAmlan Barua for (i=0;i<local_n0;i++){ 8453c3a9c75SAmlan Barua for (j=0;j<dim[1];j++){ 8463c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 8473c3a9c75SAmlan Barua tempindx1 = i*NM + j; 8485b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 8493c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 8503c3a9c75SAmlan Barua } 8513c3a9c75SAmlan Barua } 8523c3a9c75SAmlan Barua 8533c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 8543c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 8553c3a9c75SAmlan Barua 856f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 857f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 858f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 860b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 861b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 862b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 863b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 864bd59e6c6SAmlan Barua #endif 8659496c9aeSAmlan Barua break; 8663c3a9c75SAmlan Barua 8673c3a9c75SAmlan Barua case 3: 868bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 869bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 870bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 871bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 872bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 873bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 875bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 876bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 877bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 878bd59e6c6SAmlan Barua #else 87951d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 88051d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 88151d1eed7SAmlan Barua 8829496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 8839496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 88451d1eed7SAmlan Barua 88551d1eed7SAmlan Barua if (dim[2]%2==0) 88651d1eed7SAmlan Barua NM = dim[2]+2; 88751d1eed7SAmlan Barua else 88851d1eed7SAmlan Barua NM = dim[2]+1; 88951d1eed7SAmlan Barua 89051d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 89151d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 89251d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 89351d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 89451d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 89551d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 89651d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 89751d1eed7SAmlan Barua } 89851d1eed7SAmlan Barua } 89951d1eed7SAmlan Barua } 90051d1eed7SAmlan Barua 90151d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 90251d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 90351d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 90451d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 90551d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 90651d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 907b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 908b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 909b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 910b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 911bd59e6c6SAmlan Barua #endif 9129496c9aeSAmlan Barua break; 9133c3a9c75SAmlan Barua 9143c3a9c75SAmlan Barua default: 915bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 916bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 917bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 918bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 919bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 920bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 921bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 923bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 924bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 925bd59e6c6SAmlan Barua #else 926e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 927e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 928e5d7f247SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 929e5d7f247SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 930e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 931e5d7f247SAmlan Barua 932e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 933e5d7f247SAmlan Barua 934e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 935e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 936e5d7f247SAmlan Barua 937e5d7f247SAmlan Barua if (dim[ndim-1]%2==0) 938ba6e06d0SAmlan Barua NM = 2; 939e5d7f247SAmlan Barua else 940ba6e06d0SAmlan Barua NM = 1; 941e5d7f247SAmlan Barua 9426971673cSAmlan Barua j = low; 9436971673cSAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 9446971673cSAmlan Barua { 9456971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 9466971673cSAmlan Barua indx2[i] = j; 9476971673cSAmlan Barua if (k%dim[ndim-1]==0) 9486971673cSAmlan Barua { j+=NM;} 9496971673cSAmlan Barua j++; 9506971673cSAmlan Barua } 9516971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 9526971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 9536971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 9546971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9556971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9566971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 957b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 958b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 959b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 960b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 961bd59e6c6SAmlan Barua #endif 9629496c9aeSAmlan Barua break; 9633c3a9c75SAmlan Barua } 964e81bb599SAmlan Barua } 965*1abc6020SAmlan Barua return(0); 9663c3a9c75SAmlan Barua } 9673c3a9c75SAmlan Barua EXTERN_C_END 9683c3a9c75SAmlan Barua 9693c3a9c75SAmlan Barua #undef __FUNCT__ 9703c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT" 9713c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 9723c3a9c75SAmlan Barua { 9733c3a9c75SAmlan Barua PetscErrorCode ierr; 9743c3a9c75SAmlan Barua PetscFunctionBegin; 9753c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 9763c3a9c75SAmlan Barua PetscFunctionReturn(0); 9773c3a9c75SAmlan Barua } 97854efbe56SHong Zhang 9795b3e41ffSAmlan Barua /* 9805b3e41ffSAmlan Barua OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 9815b3e41ffSAmlan Barua Input A, x, y 9825b3e41ffSAmlan Barua A - FFTW matrix 9835b3e41ffSAmlan Barua x - FFTW vector 9845b3e41ffSAmlan Barua y - PETSc vector that user can read 9855b3e41ffSAmlan Barua Options Database Keys: 9865b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 9875b3e41ffSAmlan Barua 9885b3e41ffSAmlan Barua Level: intermediate 9895b3e41ffSAmlan Barua 9903c3a9c75SAmlan Barua */ 9913c3a9c75SAmlan Barua 9923c3a9c75SAmlan Barua EXTERN_C_BEGIN 9933c3a9c75SAmlan Barua #undef __FUNCT__ 9945b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW" 9955b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 9965b3e41ffSAmlan Barua { 9975b3e41ffSAmlan Barua PetscErrorCode ierr; 9985b3e41ffSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 9995b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 10005b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 10019496c9aeSAmlan Barua PetscInt N=fft->N; 1002b3655f67SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 10039496c9aeSAmlan Barua PetscInt low; 10049496c9aeSAmlan Barua PetscInt rank,size; 10055b3e41ffSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 10069496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 10075b3e41ffSAmlan Barua VecScatter vecscat; 10085b3e41ffSAmlan Barua IS list1,list2; 10099496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 10109496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 10119496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 10129496c9aeSAmlan Barua PetscInt N1, n1 ,NM; 10139496c9aeSAmlan Barua ptrdiff_t temp; 10149496c9aeSAmlan Barua #endif 10159496c9aeSAmlan Barua 10165b3e41ffSAmlan Barua 10175b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 10185b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1019b3655f67SAmlan Barua ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr); 10205b3e41ffSAmlan Barua 1021e81bb599SAmlan Barua if (size==1){ 1022b3655f67SAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 10236971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 10246971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10256971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10266971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1027b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1028e81bb599SAmlan Barua } 1029e81bb599SAmlan Barua else{ 1030e81bb599SAmlan Barua 1031e81bb599SAmlan Barua switch (ndim){ 1032e81bb599SAmlan Barua case 1: 103364657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 103464657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 10359496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1); 10369496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2); 103764657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 103864657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 103964657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 104064657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 104164657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 104264657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 104364657d84SAmlan Barua #else 10446a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 104564657d84SAmlan Barua #endif 10465b3e41ffSAmlan Barua break; 10475b3e41ffSAmlan Barua case 2: 1048bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1049bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1050bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 1051bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 1052bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1053bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1054bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1055bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1056bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1057bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1058bd59e6c6SAmlan Barua #else 10595b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 10605b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 10615b3e41ffSAmlan Barua 10629496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 10639496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 10645b3e41ffSAmlan Barua 10655b3e41ffSAmlan Barua if (dim[1]%2==0) 10665b3e41ffSAmlan Barua NM = dim[1]+2; 10675b3e41ffSAmlan Barua else 10685b3e41ffSAmlan Barua NM = dim[1]+1; 10695b3e41ffSAmlan Barua 10705b3e41ffSAmlan Barua for (i=0;i<local_n0;i++){ 10715b3e41ffSAmlan Barua for (j=0;j<dim[1];j++){ 10725b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 10735b3e41ffSAmlan Barua tempindx1 = i*NM + j; 10745b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 10755b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 10765b3e41ffSAmlan Barua } 10775b3e41ffSAmlan Barua } 10785b3e41ffSAmlan Barua 10795b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 10805b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 10815b3e41ffSAmlan Barua 10825b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 10835b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10845b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10855b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1086b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1087b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1088b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1089b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1090bd59e6c6SAmlan Barua #endif 10919496c9aeSAmlan Barua break; 10925b3e41ffSAmlan Barua case 3: 1093bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1094bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1095bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1096bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1097bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1098bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1099bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1100bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1101bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1102bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1103bd59e6c6SAmlan Barua #else 1104bd59e6c6SAmlan Barua 110551d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 110651d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 110751d1eed7SAmlan Barua 11089496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 11099496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 11109496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 11119496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 111251d1eed7SAmlan Barua 111351d1eed7SAmlan Barua if (dim[2]%2==0) 111451d1eed7SAmlan Barua NM = dim[2]+2; 111551d1eed7SAmlan Barua else 111651d1eed7SAmlan Barua NM = dim[2]+1; 111751d1eed7SAmlan Barua 111851d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 111951d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 112051d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 112151d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 112251d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 112351d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 112451d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 112551d1eed7SAmlan Barua } 112651d1eed7SAmlan Barua } 112751d1eed7SAmlan Barua } 112851d1eed7SAmlan Barua 112951d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 113051d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 113151d1eed7SAmlan Barua 113251d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 113351d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 113451d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 113551d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1136b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1137b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1138b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1139b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1140bd59e6c6SAmlan Barua #endif 11419496c9aeSAmlan Barua break; 11425b3e41ffSAmlan Barua default: 1143bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1144bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1145bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1146bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1147bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1148bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1149bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1150bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1151bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1152bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1153bd59e6c6SAmlan Barua #else 1154ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1155ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1156ba6e06d0SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1157ba6e06d0SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1158ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1159ba6e06d0SAmlan Barua 1160ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1161ba6e06d0SAmlan Barua 1162ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1163ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1164ba6e06d0SAmlan Barua 1165ba6e06d0SAmlan Barua if (dim[ndim-1]%2==0) 1166ba6e06d0SAmlan Barua NM = 2; 1167ba6e06d0SAmlan Barua else 1168ba6e06d0SAmlan Barua NM = 1; 1169ba6e06d0SAmlan Barua 1170ba6e06d0SAmlan Barua j = low; 1171ba6e06d0SAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1172ba6e06d0SAmlan Barua { 1173ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1174ba6e06d0SAmlan Barua indx2[i] = j; 1175ba6e06d0SAmlan Barua if (k%dim[ndim-1]==0) 1176ba6e06d0SAmlan Barua { j+=NM;} 1177ba6e06d0SAmlan Barua j++; 1178ba6e06d0SAmlan Barua } 1179ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1180ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1181ba6e06d0SAmlan Barua 1182ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1183ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1184ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1185ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1186b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1187b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1188b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1189b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1190bd59e6c6SAmlan Barua #endif 11919496c9aeSAmlan Barua break; 11925b3e41ffSAmlan Barua } 1193e81bb599SAmlan Barua } 11945b3e41ffSAmlan Barua return 0; 11955b3e41ffSAmlan Barua } 11965b3e41ffSAmlan Barua EXTERN_C_END 11975b3e41ffSAmlan Barua 11985b3e41ffSAmlan Barua EXTERN_C_BEGIN 11995b3e41ffSAmlan Barua #undef __FUNCT__ 12003c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 12013c3a9c75SAmlan Barua /* 12023c3a9c75SAmlan Barua MatCreate_FFTW - Creates a matrix object that provides FFT 12033c3a9c75SAmlan Barua via the external package FFTW 12043c3a9c75SAmlan Barua Options Database Keys: 12053c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 12063c3a9c75SAmlan Barua 12073c3a9c75SAmlan Barua Level: intermediate 12083c3a9c75SAmlan Barua 12093c3a9c75SAmlan Barua */ 12103c3a9c75SAmlan Barua 1211b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 1212b2b77a04SHong Zhang { 1213b2b77a04SHong Zhang PetscErrorCode ierr; 1214b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 1215b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1216b2b77a04SHong Zhang Mat_FFTW *fftw; 1217b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1218b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1219b2b77a04SHong Zhang PetscBool flg; 12204f48bc67SAmlan Barua PetscInt p_flag,partial_dim=1,ctr; 122111902ff2SHong Zhang PetscMPIInt size,rank; 12229496c9aeSAmlan Barua ptrdiff_t *pdim; 12239496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 12249496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12259496c9aeSAmlan Barua ptrdiff_t temp; 12268ca4c034SAmlan Barua PetscInt N1,tot_dim; 12274f48bc67SAmlan Barua #else 12284f48bc67SAmlan Barua PetscInt n1; 12299496c9aeSAmlan Barua #endif 12309496c9aeSAmlan Barua 1231b2b77a04SHong Zhang 1232b2b77a04SHong Zhang PetscFunctionBegin; 1233b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 123411902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 123554dd5118SAmlan Barua ierr = MPI_Barrier(PETSC_COMM_WORLD); 1236c92418ccSAmlan Barua 123711902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 123811902ff2SHong Zhang pdim[0] = dim[0]; 12398ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12408ca4c034SAmlan Barua tot_dim = 2*dim[0]; 12418ca4c034SAmlan Barua #endif 124211902ff2SHong Zhang for (ctr=1;ctr<ndim;ctr++) 124311902ff2SHong Zhang { 12446882372aSHong Zhang partial_dim *= dim[ctr]; 124511902ff2SHong Zhang pdim[ctr] = dim[ctr]; 12468ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12478ca4c034SAmlan Barua if (ctr==ndim-1) 12488ca4c034SAmlan Barua tot_dim *= (dim[ctr]/2+1); 12498ca4c034SAmlan Barua else 12508ca4c034SAmlan Barua tot_dim *= dim[ctr]; 12518ca4c034SAmlan Barua #endif 12526882372aSHong Zhang } 12533c3a9c75SAmlan Barua 1254b2b77a04SHong Zhang if (size == 1) { 1255e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1256b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1257b2b77a04SHong Zhang n = N; 1258e81bb599SAmlan Barua #else 1259e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1260e81bb599SAmlan Barua n = tot_dim; 1261e81bb599SAmlan Barua #endif 1262e81bb599SAmlan Barua 1263b2b77a04SHong Zhang } else { 1264*1abc6020SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 1265b2b77a04SHong Zhang switch (ndim){ 1266b2b77a04SHong Zhang case 1: 12673c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12683c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1269e5d7f247SAmlan Barua #else 12709496c9aeSAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 12716882372aSHong Zhang n = (PetscInt)local_n0; 12729496c9aeSAmlan Barua n1 = (PetscInt) local_n1; 12739496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1274e5d7f247SAmlan Barua #endif 1275b2b77a04SHong Zhang break; 1276b2b77a04SHong Zhang case 2: 12775b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 1278b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1279b2b77a04SHong Zhang /* 1280b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1281b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 1282b2b77a04SHong Zhang */ 1283b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1284b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 12855b3e41ffSAmlan Barua #else 12865b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 12875b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1288c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 12895b3e41ffSAmlan Barua #endif 1290b2b77a04SHong Zhang break; 1291b2b77a04SHong Zhang case 3: 129251d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 129351d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 12946882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 12956882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 129651d1eed7SAmlan Barua #else 129751d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 129851d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1299c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 130051d1eed7SAmlan Barua #endif 1301b2b77a04SHong Zhang break; 1302b2b77a04SHong Zhang default: 1303b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 130411902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 13056882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 13066882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1307b3a17365SAmlan Barua #else 1308b3a17365SAmlan Barua temp = pdim[ndim-1]; 1309b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 1310b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1311b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1312b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1313b3a17365SAmlan Barua pdim[ndim-1] = temp; 1314c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1315b3a17365SAmlan Barua #endif 1316b2b77a04SHong Zhang break; 1317b2b77a04SHong Zhang } 1318b2b77a04SHong Zhang } 1319b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1320b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1321b2b77a04SHong Zhang fft->data = (void*)fftw; 1322b2b77a04SHong Zhang 1323b2b77a04SHong Zhang fft->n = n; 1324c92418ccSAmlan Barua fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1325e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 1326c92418ccSAmlan Barua ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1327b1301b2eSHong Zhang for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1328c92418ccSAmlan Barua 1329b2b77a04SHong Zhang fftw->p_forward = 0; 1330b2b77a04SHong Zhang fftw->p_backward = 0; 1331b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1332b2b77a04SHong Zhang 1333b2b77a04SHong Zhang if (size == 1){ 1334b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1335b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1336b2b77a04SHong Zhang } else { 1337b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1338b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1339b2b77a04SHong Zhang } 1340b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 13414be45526SHong Zhang //A->ops->getvecs = MatGetVecs_FFTW; 1342b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 13434be45526SHong Zhang PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW); 13443c3a9c75SAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 13455b3e41ffSAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 1346b2b77a04SHong Zhang 1347b2b77a04SHong Zhang /* get runtime options */ 1348b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1349b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1350b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 1351b2b77a04SHong Zhang PetscOptionsEnd(); 1352b2b77a04SHong Zhang PetscFunctionReturn(0); 1353b2b77a04SHong Zhang } 1354b2b77a04SHong Zhang EXTERN_C_END 13553c3a9c75SAmlan Barua 13563c3a9c75SAmlan Barua 13573c3a9c75SAmlan Barua 13583c3a9c75SAmlan Barua 1359