1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14e5d7f247SAmlan Barua PetscInt partial_dim; 15b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 16b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 19b2b77a04SHong Zhang } Mat_FFTW; 20b2b77a04SHong Zhang 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 274be45526SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); // to be removed! 28b2b77a04SHong Zhang 29b2b77a04SHong Zhang #undef __FUNCT__ 30b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 31b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 32b2b77a04SHong Zhang { 33b2b77a04SHong Zhang PetscErrorCode ierr; 34b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 35b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 36b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 37b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 38b2b77a04SHong Zhang 39b2b77a04SHong Zhang PetscFunctionBegin; 40b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 41b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 42b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 43b2b77a04SHong Zhang switch (ndim){ 44b2b77a04SHong Zhang case 1: 4558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 46b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 4758a912c5SAmlan Barua #else 4858a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 4958a912c5SAmlan Barua #endif 50b2b77a04SHong Zhang break; 51b2b77a04SHong Zhang case 2: 5258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 53b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 5458a912c5SAmlan Barua #else 5558a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 5658a912c5SAmlan Barua #endif 57b2b77a04SHong Zhang break; 58b2b77a04SHong Zhang case 3: 5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 60b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6158a912c5SAmlan Barua #else 6258a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 6358a912c5SAmlan Barua #endif 64b2b77a04SHong Zhang break; 65b2b77a04SHong Zhang default: 6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 67b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6858a912c5SAmlan Barua #else 6958a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 7058a912c5SAmlan Barua #endif 71b2b77a04SHong Zhang break; 72b2b77a04SHong Zhang } 73b2b77a04SHong Zhang fftw->finarray = x_array; 74b2b77a04SHong Zhang fftw->foutarray = y_array; 75b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 76b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 77b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 78b2b77a04SHong Zhang } else { /* use existing plan */ 79b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 80b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 81b2b77a04SHong Zhang } else { 82b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 83b2b77a04SHong Zhang } 84b2b77a04SHong Zhang } 85b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 86b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 87b2b77a04SHong Zhang PetscFunctionReturn(0); 88b2b77a04SHong Zhang } 89b2b77a04SHong Zhang 90b2b77a04SHong Zhang #undef __FUNCT__ 91b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 92b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 93b2b77a04SHong Zhang { 94b2b77a04SHong Zhang PetscErrorCode ierr; 95b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 96b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 97b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 98b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 99b2b77a04SHong Zhang 100b2b77a04SHong Zhang PetscFunctionBegin; 101b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 102b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 103b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 104b2b77a04SHong Zhang switch (ndim){ 105b2b77a04SHong Zhang case 1: 10658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 107b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 10858a912c5SAmlan Barua #else 109e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 11058a912c5SAmlan Barua #endif 111b2b77a04SHong Zhang break; 112b2b77a04SHong Zhang case 2: 11358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 114b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 11558a912c5SAmlan Barua #else 116e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 11758a912c5SAmlan Barua #endif 118b2b77a04SHong Zhang break; 119b2b77a04SHong Zhang case 3: 12058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 121b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12258a912c5SAmlan Barua #else 123e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 12458a912c5SAmlan Barua #endif 125b2b77a04SHong Zhang break; 126b2b77a04SHong Zhang default: 12758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 128b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12958a912c5SAmlan Barua #else 130e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 13158a912c5SAmlan Barua #endif 132b2b77a04SHong Zhang break; 133b2b77a04SHong Zhang } 134b2b77a04SHong Zhang fftw->binarray = x_array; 135b2b77a04SHong Zhang fftw->boutarray = y_array; 136b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 137b2b77a04SHong Zhang } else { /* use existing plan */ 138b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 139b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 140b2b77a04SHong Zhang } else { 141b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 142b2b77a04SHong Zhang } 143b2b77a04SHong Zhang } 144b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 145b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 146b2b77a04SHong Zhang PetscFunctionReturn(0); 147b2b77a04SHong Zhang } 148b2b77a04SHong Zhang 149b2b77a04SHong Zhang #undef __FUNCT__ 150b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 151b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 152b2b77a04SHong Zhang { 153b2b77a04SHong Zhang PetscErrorCode ierr; 154b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 155b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 156b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 157c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 158b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 159b2b77a04SHong Zhang 160b2b77a04SHong Zhang PetscFunctionBegin; 161b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 162b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 163b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 164b2b77a04SHong Zhang switch (ndim){ 165b2b77a04SHong Zhang case 1: 1663c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 167b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 168ae0a50aaSHong Zhang #else 169ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 1703c3a9c75SAmlan Barua #endif 171b2b77a04SHong Zhang break; 172b2b77a04SHong Zhang case 2: 1733c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 174b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1753c3a9c75SAmlan Barua #else 1763c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1773c3a9c75SAmlan Barua #endif 178b2b77a04SHong Zhang break; 179b2b77a04SHong Zhang case 3: 1803c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 181b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1823c3a9c75SAmlan Barua #else 18351d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1843c3a9c75SAmlan Barua #endif 185b2b77a04SHong Zhang break; 186b2b77a04SHong Zhang default: 1873c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 188c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1893c3a9c75SAmlan Barua #else 1903c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1913c3a9c75SAmlan Barua #endif 192b2b77a04SHong Zhang break; 193b2b77a04SHong Zhang } 194b2b77a04SHong Zhang fftw->finarray = x_array; 195b2b77a04SHong Zhang fftw->foutarray = y_array; 196b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 197b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 198b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 199b2b77a04SHong Zhang } else { /* use existing plan */ 200b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 201b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 202b2b77a04SHong Zhang } else { 203b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 204b2b77a04SHong Zhang } 205b2b77a04SHong Zhang } 206b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 207b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 208b2b77a04SHong Zhang PetscFunctionReturn(0); 209b2b77a04SHong Zhang } 210b2b77a04SHong Zhang 211b2b77a04SHong Zhang #undef __FUNCT__ 212b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 213b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 214b2b77a04SHong Zhang { 215b2b77a04SHong Zhang PetscErrorCode ierr; 216b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 217b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 218b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 219c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 220b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 221b2b77a04SHong Zhang 222b2b77a04SHong Zhang PetscFunctionBegin; 223b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 224b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 225b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 226b2b77a04SHong Zhang switch (ndim){ 227b2b77a04SHong Zhang case 1: 2283c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 229b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 230ae0a50aaSHong Zhang #else 231ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 2323c3a9c75SAmlan Barua #endif 233b2b77a04SHong Zhang break; 234b2b77a04SHong Zhang case 2: 2353c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 236b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2373c3a9c75SAmlan Barua #else 2383c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2393c3a9c75SAmlan Barua #endif 240b2b77a04SHong Zhang break; 241b2b77a04SHong Zhang case 3: 2423c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 243b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2443c3a9c75SAmlan Barua #else 2453c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2463c3a9c75SAmlan Barua #endif 247b2b77a04SHong Zhang break; 248b2b77a04SHong Zhang default: 2493c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 250c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2513c3a9c75SAmlan Barua #else 2523c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2533c3a9c75SAmlan Barua #endif 254b2b77a04SHong Zhang break; 255b2b77a04SHong Zhang } 256b2b77a04SHong Zhang fftw->binarray = x_array; 257b2b77a04SHong Zhang fftw->boutarray = y_array; 258b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 259b2b77a04SHong Zhang } else { /* use existing plan */ 260b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 261b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 262b2b77a04SHong Zhang } else { 263b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 264b2b77a04SHong Zhang } 265b2b77a04SHong Zhang } 266b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 267b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 268b2b77a04SHong Zhang PetscFunctionReturn(0); 269b2b77a04SHong Zhang } 270b2b77a04SHong Zhang 271b2b77a04SHong Zhang #undef __FUNCT__ 272b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 273b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 274b2b77a04SHong Zhang { 275b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 276b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 277b2b77a04SHong Zhang PetscErrorCode ierr; 278b2b77a04SHong Zhang 279b2b77a04SHong Zhang PetscFunctionBegin; 280b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 281b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 282b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 283bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 284b2b77a04SHong Zhang PetscFunctionReturn(0); 285b2b77a04SHong Zhang } 286b2b77a04SHong Zhang 287c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 288b2b77a04SHong Zhang #undef __FUNCT__ 289b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 290b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 291b2b77a04SHong Zhang { 292b2b77a04SHong Zhang PetscErrorCode ierr; 293b2b77a04SHong Zhang PetscScalar *array; 294b2b77a04SHong Zhang 295b2b77a04SHong Zhang PetscFunctionBegin; 296b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 297b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 298b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 299b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 300b2b77a04SHong Zhang PetscFunctionReturn(0); 301b2b77a04SHong Zhang } 302b2b77a04SHong Zhang 303b2b77a04SHong Zhang #undef __FUNCT__ 3044be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_1DC" 305c92418ccSAmlan Barua /* 3064be45526SHong Zhang - Get Vectors(s) compatible with matrix, i.e. with the 307c92418ccSAmlan Barua parallel layout determined by FFTW-1D 308c92418ccSAmlan Barua 309c92418ccSAmlan Barua */ 3104be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_1DC(Mat A,Vec *fin,Vec *fout,Vec *bout) 311c92418ccSAmlan Barua { 312c92418ccSAmlan Barua PetscErrorCode ierr; 313c92418ccSAmlan Barua PetscMPIInt size,rank; 314c92418ccSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 315c92418ccSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 316c92418ccSAmlan Barua // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 317c92418ccSAmlan Barua PetscInt N=fft->N; 318c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 319c92418ccSAmlan Barua ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 320c92418ccSAmlan Barua ptrdiff_t f_local_n1,f_local_1_end; 321c92418ccSAmlan Barua ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 322c92418ccSAmlan Barua ptrdiff_t b_local_n1,b_local_1_end; 323ae0a50aaSHong Zhang fftw_complex *data_fin,*data_fout,*data_bout; 324c92418ccSAmlan Barua 325c92418ccSAmlan Barua PetscFunctionBegin; 326c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 327c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 328c92418ccSAmlan Barua #endif 329c92418ccSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 330c92418ccSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 331c92418ccSAmlan Barua if (size == 1){ 332c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 333c92418ccSAmlan Barua } 334c92418ccSAmlan Barua else { 335c92418ccSAmlan Barua if (ndim>1){ 336c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 337c92418ccSAmlan Barua else { 338c92418ccSAmlan Barua f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 339c92418ccSAmlan Barua b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 340c92418ccSAmlan Barua if (fin) { 341c92418ccSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 342c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 343c92418ccSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 344c92418ccSAmlan Barua } 345c92418ccSAmlan Barua if (fout) { 346c92418ccSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 347c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 348c92418ccSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 349c92418ccSAmlan Barua } 350c92418ccSAmlan Barua if (bout) { 351c92418ccSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 352c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 353c92418ccSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 354c92418ccSAmlan Barua } 355c92418ccSAmlan Barua } 356c92418ccSAmlan Barua if (fin){ 357c92418ccSAmlan Barua ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 358c92418ccSAmlan Barua } 359c92418ccSAmlan Barua if (fout){ 360c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 361c92418ccSAmlan Barua } 362c92418ccSAmlan Barua if (bout){ 363c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 364c92418ccSAmlan Barua } 365c92418ccSAmlan Barua PetscFunctionReturn(0); 366c92418ccSAmlan Barua } 367c92418ccSAmlan Barua 368c92418ccSAmlan Barua 369c92418ccSAmlan Barua } 3703c3a9c75SAmlan Barua 3714f7415efSAmlan Barua #undef __FUNCT__ 3724be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW" 3734be45526SHong Zhang /*@ 374b2aa233eSHong Zhang MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 3754f7415efSAmlan Barua parallel layout determined by FFTW 3764f7415efSAmlan Barua 3774f7415efSAmlan Barua Collective on Mat 3784f7415efSAmlan Barua 3794f7415efSAmlan Barua Input Parameter: 3804f7415efSAmlan Barua . mat - the matrix 3814f7415efSAmlan Barua 3824f7415efSAmlan Barua Output Parameter: 3834f7415efSAmlan Barua + fin - (optional) input vector of forward FFTW 3844f7415efSAmlan Barua - fout - (optional) output vector of forward FFTW 3854f7415efSAmlan Barua 3864f7415efSAmlan Barua Level: advanced 3874f7415efSAmlan Barua 3884f7415efSAmlan Barua .seealso: MatCreateFFTW() 3894be45526SHong Zhang @*/ 3904be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 3914be45526SHong Zhang { 3924be45526SHong Zhang PetscErrorCode ierr; 3934be45526SHong Zhang PetscFunctionBegin; 3944be45526SHong Zhang ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 3954be45526SHong Zhang PetscFunctionReturn(0); 3964be45526SHong Zhang } 3974be45526SHong Zhang 3984f7415efSAmlan Barua EXTERN_C_BEGIN 3994be45526SHong Zhang #undef __FUNCT__ 4004be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW" 4014be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 4024f7415efSAmlan Barua { 4034f7415efSAmlan Barua PetscErrorCode ierr; 4044f7415efSAmlan Barua PetscMPIInt size,rank; 4054f7415efSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 4064f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 4074f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 4089496c9aeSAmlan Barua PetscInt N=fft->N; 4094f7415efSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 4104f7415efSAmlan Barua 4114f7415efSAmlan Barua PetscFunctionBegin; 4124f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 4134f7415efSAmlan Barua PetscValidType(A,1); 4144f7415efSAmlan Barua 4154f7415efSAmlan Barua ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 4164f7415efSAmlan Barua ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 4174f7415efSAmlan Barua if (size == 1){ /* sequential case */ 4184f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4194f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 4204f7415efSAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 4214f7415efSAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 4224f7415efSAmlan Barua #else 4239496c9aeSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 4244f7415efSAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 4254f7415efSAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);} 4264f7415efSAmlan Barua #endif 4274f7415efSAmlan Barua } else { 4284f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 4299496c9aeSAmlan Barua ptrdiff_t local_n1; 4309496c9aeSAmlan Barua fftw_complex *data_fout; 4319496c9aeSAmlan Barua ptrdiff_t local_1_start; 4329496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4339496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 4349496c9aeSAmlan Barua #else 4354f7415efSAmlan Barua double *data_finr,*data_boutr; 4369496c9aeSAmlan Barua PetscInt n1,N1,vsize; 4379496c9aeSAmlan Barua ptrdiff_t temp; 4389496c9aeSAmlan Barua #endif 4399496c9aeSAmlan Barua 4404f7415efSAmlan Barua switch (ndim){ 4414f7415efSAmlan Barua case 1: 44257625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 44357625b84SAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 44457625b84SAmlan Barua #else 4459496c9aeSAmlan Barua //SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet"); 44657625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 44757625b84SAmlan Barua if (fin) { 44857625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 44957625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 45057625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 45157625b84SAmlan Barua } 45257625b84SAmlan Barua if (fout) { 45357625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 45457625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 45557625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 45657625b84SAmlan Barua } 45757625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 45857625b84SAmlan Barua if (bout) { 45957625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 46057625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 46157625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 46257625b84SAmlan Barua } 46357625b84SAmlan Barua break; 46457625b84SAmlan Barua #endif 4654f7415efSAmlan Barua case 2: 4664f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4674f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4684f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 4694f7415efSAmlan Barua if (fin) { 4704f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4714f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 4724f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4734f7415efSAmlan Barua } 4744f7415efSAmlan Barua if (bout) { 4754f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4764f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 4774f7415efSAmlan Barua ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 4784f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4794f7415efSAmlan Barua } 480c8a8a4f0SAmlan Barua //n1 = 2*local_n1*dim[0]; 48157625b84SAmlan Barua if (fout) { 48257625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4839496c9aeSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 48457625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 48557625b84SAmlan Barua } 4864f7415efSAmlan Barua #else 4874f7415efSAmlan Barua /* Get local size */ 4884f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 4894f7415efSAmlan Barua if (fin) { 4904f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4914f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4924f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4934f7415efSAmlan Barua } 4944f7415efSAmlan Barua if (fout) { 4954f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4964f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4974f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4984f7415efSAmlan Barua } 4994f7415efSAmlan Barua if (bout) { 5004f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5014f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5024f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5034f7415efSAmlan Barua } 5044f7415efSAmlan Barua #endif 5054f7415efSAmlan Barua break; 5064f7415efSAmlan Barua case 3: 5074f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5084f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5094f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 5104f7415efSAmlan Barua if (fin) { 5114f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5124f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 5134f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5144f7415efSAmlan Barua } 5154f7415efSAmlan Barua if (bout) { 5164f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5174f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 5184f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5194f7415efSAmlan Barua } 520c8a8a4f0SAmlan Barua //n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 52157625b84SAmlan Barua if (fout) { 52257625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 52357625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 52457625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 52557625b84SAmlan Barua } 5264f7415efSAmlan Barua #else 5270c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 5280c9b18e4SAmlan Barua if (fin) { 5290c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5300c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5310c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5320c9b18e4SAmlan Barua } 5330c9b18e4SAmlan Barua if (fout) { 5340c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5350c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5360c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5370c9b18e4SAmlan Barua } 5380c9b18e4SAmlan Barua if (bout) { 5390c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5400c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5410c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5420c9b18e4SAmlan Barua } 5434f7415efSAmlan Barua #endif 5444f7415efSAmlan Barua break; 5454f7415efSAmlan Barua default: 5464f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5474f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 5484f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 5494f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5504f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 5514f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 5524f7415efSAmlan Barua 5534f7415efSAmlan Barua if (fin) { 5544f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5554f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 5564f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5574f7415efSAmlan Barua } 5584f7415efSAmlan Barua if (bout) { 5594f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5604f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 5619496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5624f7415efSAmlan Barua } 563c8a8a4f0SAmlan Barua //temp = fftw->partial_dim; 564c8a8a4f0SAmlan Barua //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 565c8a8a4f0SAmlan Barua //n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 56657625b84SAmlan Barua if (fout) { 56757625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 56857625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 56957625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 57057625b84SAmlan Barua } 5714f7415efSAmlan Barua #else 5720c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 5730c9b18e4SAmlan Barua if (fin) { 5740c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5750c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5760c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5770c9b18e4SAmlan Barua } 5780c9b18e4SAmlan Barua if (fout) { 5790c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5800c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5810c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5820c9b18e4SAmlan Barua } 5830c9b18e4SAmlan Barua if (bout) { 5840c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5850c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5860c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5870c9b18e4SAmlan Barua } 5884f7415efSAmlan Barua #endif 5894f7415efSAmlan Barua break; 5904f7415efSAmlan Barua } 5914f7415efSAmlan Barua } 5924f7415efSAmlan Barua PetscFunctionReturn(0); 5934f7415efSAmlan Barua } 5944f7415efSAmlan Barua EXTERN_C_END 5954f7415efSAmlan Barua 596c92418ccSAmlan Barua #undef __FUNCT__ 597b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW" 598b2b77a04SHong Zhang /* 599b2b77a04SHong Zhang MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 600b2b77a04SHong Zhang parallel layout determined by FFTW 601b2b77a04SHong Zhang 602b2b77a04SHong Zhang Collective on Mat 603b2b77a04SHong Zhang 604b2b77a04SHong Zhang Input Parameter: 605b2b77a04SHong Zhang . mat - the matrix 606b2b77a04SHong Zhang 607b2b77a04SHong Zhang Output Parameter: 608b2b77a04SHong Zhang + fin - (optional) input vector of forward FFTW 609b2b77a04SHong Zhang - fout - (optional) output vector of forward FFTW 610b2b77a04SHong Zhang 611b2b77a04SHong Zhang Level: advanced 612b2b77a04SHong Zhang 613b2b77a04SHong Zhang .seealso: MatCreateFFTW() 614b2b77a04SHong Zhang */ 615b2b77a04SHong Zhang PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 616b2b77a04SHong Zhang { 617b2b77a04SHong Zhang PetscErrorCode ierr; 618b2b77a04SHong Zhang PetscMPIInt size,rank; 619b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 620b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 621c92418ccSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 6229496c9aeSAmlan Barua PetscInt N=fft->N; 623e81bb599SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 624b2b77a04SHong Zhang 625b2b77a04SHong Zhang PetscFunctionBegin; 626b2b77a04SHong Zhang PetscValidHeaderSpecific(A,MAT_CLASSID,1); 627b2b77a04SHong Zhang PetscValidType(A,1); 628b2b77a04SHong Zhang 629b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 630b2b77a04SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 631b2b77a04SHong Zhang if (size == 1){ /* sequential case */ 632e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX) 633b2b77a04SHong Zhang if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 634b2b77a04SHong Zhang if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 635e5d7f247SAmlan Barua #else 636e81bb599SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 637e81bb599SAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 638e81bb599SAmlan Barua #endif 639b2b77a04SHong Zhang } else { /* mpi case */ 640b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 6416882372aSHong Zhang ptrdiff_t local_n1,local_1_end; 642b2b77a04SHong Zhang fftw_complex *data_fin,*data_fout; 6439496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 64451d1eed7SAmlan Barua double *data_finr ; 645b3a17365SAmlan Barua ptrdiff_t local_1_start,temp; 6469496c9aeSAmlan Barua PetscInt vsize,n1,N1; 6479496c9aeSAmlan Barua #endif 6489496c9aeSAmlan Barua 649c92418ccSAmlan Barua // PetscInt ctr; 650c92418ccSAmlan Barua // ptrdiff_t ndim1,*pdim; 651c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 652c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 65311902ff2SHong Zhang 654c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 655f76f76beSAmlan Barua // {k 656c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 657c92418ccSAmlan Barua // } 658b2b77a04SHong Zhang 659f76f76beSAmlan Barua 660f76f76beSAmlan Barua 661b2b77a04SHong Zhang switch (ndim){ 662b2b77a04SHong Zhang case 1: 6636882372aSHong Zhang /* Get local size */ 664e5d7f247SAmlan Barua /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */ 665c92418ccSAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 6666882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 6679496c9aeSAmlan Barua printf("The values of local_n0 and local_n1 are %ld and %ld\n",local_n0,local_n1); 6686882372aSHong Zhang if (fin) { 6696882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6706882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 6716882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6726882372aSHong Zhang } 6736882372aSHong Zhang if (fout) { 6746882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 67557625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6766882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6776882372aSHong Zhang } 678b2b77a04SHong Zhang break; 679b2b77a04SHong Zhang case 2: 6803c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6813c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 6823c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 6833c3a9c75SAmlan Barua if (fin) { 6843c3a9c75SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 68554dd5118SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 6863c3a9c75SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 68709dd8a53SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 6883c3a9c75SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6893c3a9c75SAmlan Barua } 69057625b84SAmlan Barua n1 = 2*local_n1*(dim[0]); 69157625b84SAmlan Barua //n1 = 2*local_n1*dim[1]; 69257625b84SAmlan Barua printf("The values are %ld\n",local_n1); 6933c3a9c75SAmlan Barua if (fout) { 69409dd8a53SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 69509dd8a53SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6963c3a9c75SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6973c3a9c75SAmlan Barua } 698f76f76beSAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 6993c3a9c75SAmlan Barua 7003c3a9c75SAmlan Barua #else 701b2b77a04SHong Zhang /* Get local size */ 70264657d84SAmlan Barua //printf("Hope this does not come here"); 703b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 704b2b77a04SHong Zhang if (fin) { 705b2b77a04SHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 706b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 707b2b77a04SHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 708b2b77a04SHong Zhang } 709b2b77a04SHong Zhang if (fout) { 710b2b77a04SHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 711b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 712b2b77a04SHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 713b2b77a04SHong Zhang } 71464657d84SAmlan Barua //printf("Hope this does not come here"); 7153c3a9c75SAmlan Barua #endif 716b2b77a04SHong Zhang break; 717b2b77a04SHong Zhang case 3: 7186882372aSHong Zhang /* Get local size */ 7193c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 72051d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 72151d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 72251d1eed7SAmlan Barua if (fin) { 72351d1eed7SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 72451d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 72551d1eed7SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 72651d1eed7SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 72751d1eed7SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 72851d1eed7SAmlan Barua } 72957625b84SAmlan Barua printf("The value is %ld",local_n1); 73057625b84SAmlan Barua n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 73151d1eed7SAmlan Barua if (fout) { 73251d1eed7SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 73351d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 73451d1eed7SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 73551d1eed7SAmlan Barua } 73651d1eed7SAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 73751d1eed7SAmlan Barua 73851d1eed7SAmlan Barua 7393c3a9c75SAmlan Barua #else 7406882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 74111902ff2SHong Zhang // printf("The quantity n is %d",n); 7426882372aSHong Zhang if (fin) { 7436882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7446882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 7456882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 7466882372aSHong Zhang } 7476882372aSHong Zhang if (fout) { 7486882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7496882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 7506882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 7516882372aSHong Zhang } 7523c3a9c75SAmlan Barua #endif 753b2b77a04SHong Zhang break; 754b2b77a04SHong Zhang default: 7556882372aSHong Zhang /* Get local size */ 7563c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 757b3a17365SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 758b3a17365SAmlan Barua printf("The value of temp is %ld\n",temp); 759b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 760b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 761b3a17365SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 762b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 763b3a17365SAmlan Barua if (fin) { 764b3a17365SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 765b3a17365SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 766b3a17365SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 767b3a17365SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 768b3a17365SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 769b3a17365SAmlan Barua } 77057625b84SAmlan Barua printf("The value is %ld. Global length is %d \n",local_n1,N1); 77157625b84SAmlan Barua temp = fftw->partial_dim; 77257625b84SAmlan Barua fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 77357625b84SAmlan Barua 77457625b84SAmlan Barua n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 775b3a17365SAmlan Barua if (fout) { 776b3a17365SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 77757625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 778b3a17365SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 779b3a17365SAmlan Barua } 780b3a17365SAmlan Barua 7813c3a9c75SAmlan Barua #else 782c92418ccSAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 78311902ff2SHong Zhang // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 78411902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 78511902ff2SHong Zhang // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 78611902ff2SHong Zhang // for(i=0;i<ndim;i++) 78711902ff2SHong Zhang // { 78811902ff2SHong Zhang // pdim[i]=dim[i];printf("%d",pdim[i]); 78911902ff2SHong Zhang // } 79011902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 79111902ff2SHong Zhang // printf("The quantity n is %d",n); 7926882372aSHong Zhang if (fin) { 7936882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7946882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 7956882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 7966882372aSHong Zhang } 7976882372aSHong Zhang if (fout) { 7986882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7996882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 8006882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 8016882372aSHong Zhang } 8023c3a9c75SAmlan Barua #endif 803b2b77a04SHong Zhang break; 804b2b77a04SHong Zhang } 805b2b77a04SHong Zhang } 80654dd5118SAmlan Barua // if (fin){ 80754dd5118SAmlan Barua // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 80854dd5118SAmlan Barua // } 80954dd5118SAmlan Barua // if (fout){ 81054dd5118SAmlan Barua // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 81154dd5118SAmlan Barua // } 812b2b77a04SHong Zhang PetscFunctionReturn(0); 813b2b77a04SHong Zhang } 814b2b77a04SHong Zhang 815b2b77a04SHong Zhang #undef __FUNCT__ 8163c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT" 8173c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 8183c3a9c75SAmlan Barua { 8193c3a9c75SAmlan Barua PetscErrorCode ierr; 8203c3a9c75SAmlan Barua PetscFunctionBegin; 8213c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 8223c3a9c75SAmlan Barua PetscFunctionReturn(0); 8233c3a9c75SAmlan Barua } 82454efbe56SHong Zhang 825b2b77a04SHong Zhang /* 8269496c9aeSAmlan Barua InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block. For real 8279496c9aeSAmlan Barua parallel FFT, this routine also performs padding of right number of zeros at the end of the fastetst 8289496c9aeSAmlan Barua changing dimension. 8293c3a9c75SAmlan Barua Input A, x, y 8303c3a9c75SAmlan Barua A - FFTW matrix 83154dd5118SAmlan Barua x - user data 832b2b77a04SHong Zhang Options Database Keys: 833b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 834b2b77a04SHong Zhang 835b2b77a04SHong Zhang Level: intermediate 836b2b77a04SHong Zhang 837b2b77a04SHong Zhang */ 8383c3a9c75SAmlan Barua 8393c3a9c75SAmlan Barua EXTERN_C_BEGIN 8403c3a9c75SAmlan Barua #undef __FUNCT__ 8413c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW" 8423c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 8433c3a9c75SAmlan Barua { 8443c3a9c75SAmlan Barua PetscErrorCode ierr; 8453c3a9c75SAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 8463c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 8473c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 8489496c9aeSAmlan Barua PetscInt N=fft->N; 849b223cc97SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 8509496c9aeSAmlan Barua PetscInt low; 8519496c9aeSAmlan Barua PetscInt rank,size; 8523c3a9c75SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 8539496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 8543c3a9c75SAmlan Barua VecScatter vecscat; 8553c3a9c75SAmlan Barua IS list1,list2; 8569496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 8579496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 8589496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 8599496c9aeSAmlan Barua PetscInt N1, n1 ,NM; 8609496c9aeSAmlan Barua ptrdiff_t temp; 8619496c9aeSAmlan Barua #endif 8623c3a9c75SAmlan Barua 863f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 864f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 8653c3a9c75SAmlan Barua ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 8663c3a9c75SAmlan Barua 867e81bb599SAmlan Barua if (size==1) 868e81bb599SAmlan Barua { 8697536937bSAmlan Barua /* switch (ndim){ 870e81bb599SAmlan Barua case 1: 871e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 872e81bb599SAmlan Barua for (i=0;i<dim[0];i++) 873e81bb599SAmlan Barua { 874e81bb599SAmlan Barua indx1[i] = i; 875e81bb599SAmlan Barua } 876e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 877e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 878e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 879e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 881b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 882b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 883e81bb599SAmlan Barua break; 884e81bb599SAmlan Barua 885e81bb599SAmlan Barua case 2: 886e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 887e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 888e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 889e81bb599SAmlan Barua indx1[i*dim[1]+j] = i*dim[1] + j; 890e81bb599SAmlan Barua } 891e81bb599SAmlan Barua } 892e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 893e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 894e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 895e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 897b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 898b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 899e81bb599SAmlan Barua break; 900e81bb599SAmlan Barua case 3: 901e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 902e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 903e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 904e81bb599SAmlan Barua for (k=0;k<dim[2];k++){ 905e81bb599SAmlan Barua indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 906e81bb599SAmlan Barua } 907e81bb599SAmlan Barua } 908e81bb599SAmlan Barua } 909e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 910e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 911e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 912e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 913e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 914b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 915b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 916e81bb599SAmlan Barua break; 917e81bb599SAmlan Barua default: 9187536937bSAmlan Barua */ 9199496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 9206971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 9216971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9226971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9236971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 924b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 9257536937bSAmlan Barua // break; 9267536937bSAmlan Barua // } 927e81bb599SAmlan Barua } 928e81bb599SAmlan Barua 929e81bb599SAmlan Barua else{ 930e81bb599SAmlan Barua 9313c3a9c75SAmlan Barua switch (ndim){ 9323c3a9c75SAmlan Barua case 1: 93364657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 93464657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 93564657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 93664657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 93764657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 93864657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 93964657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 94064657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 94164657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 94264657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 94364657d84SAmlan Barua #else 944e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 94564657d84SAmlan Barua #endif 9463c3a9c75SAmlan Barua break; 9473c3a9c75SAmlan Barua case 2: 948bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 949bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 950bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 951bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 952bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 953bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 954bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 955bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 956bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 957bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 958bd59e6c6SAmlan Barua #else 9593c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 9603c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 9619496c9aeSAmlan Barua //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 9629496c9aeSAmlan Barua //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 9639496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 9649496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 9653c3a9c75SAmlan Barua 9663c3a9c75SAmlan Barua if (dim[1]%2==0) 9673c3a9c75SAmlan Barua NM = dim[1]+2; 9683c3a9c75SAmlan Barua else 9693c3a9c75SAmlan Barua NM = dim[1]+1; 9703c3a9c75SAmlan Barua 9713c3a9c75SAmlan Barua for (i=0;i<local_n0;i++){ 9723c3a9c75SAmlan Barua for (j=0;j<dim[1];j++){ 9733c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 9743c3a9c75SAmlan Barua tempindx1 = i*NM + j; 9755b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 9763c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 9773c3a9c75SAmlan Barua } 9783c3a9c75SAmlan Barua } 9793c3a9c75SAmlan Barua 9803c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 9813c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 9823c3a9c75SAmlan Barua 983f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 984f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 985f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 986f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 987b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 988b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 989b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 990b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 991bd59e6c6SAmlan Barua #endif 9929496c9aeSAmlan Barua break; 9933c3a9c75SAmlan Barua 9943c3a9c75SAmlan Barua case 3: 995bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 996bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 997bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 998bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 999bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1000bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1001bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1002bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1003bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1005bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1006bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1007bd59e6c6SAmlan Barua #else 100851d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 100951d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 101051d1eed7SAmlan Barua 10119496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 10129496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 10139496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 10149496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 101551d1eed7SAmlan Barua 101651d1eed7SAmlan Barua if (dim[2]%2==0) 101751d1eed7SAmlan Barua NM = dim[2]+2; 101851d1eed7SAmlan Barua else 101951d1eed7SAmlan Barua NM = dim[2]+1; 102051d1eed7SAmlan Barua 102151d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 102251d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 102351d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 102451d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 102551d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 102651d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 102751d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 102851d1eed7SAmlan Barua } 102951d1eed7SAmlan Barua } 103051d1eed7SAmlan Barua } 103151d1eed7SAmlan Barua 103251d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 103351d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 103451d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 103551d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 103651d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 103751d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1038b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1039b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1040b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1041b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1042bd59e6c6SAmlan Barua #endif 10439496c9aeSAmlan Barua break; 10443c3a9c75SAmlan Barua 10453c3a9c75SAmlan Barua default: 1046bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1047bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1048bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1049bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1050bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1051bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1054bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1055bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1056bd59e6c6SAmlan Barua #else 1057e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1058e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1059e5d7f247SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1060e5d7f247SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1061e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1062e5d7f247SAmlan Barua 1063e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 1064e5d7f247SAmlan Barua 1065e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1066e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1067e5d7f247SAmlan Barua 1068e5d7f247SAmlan Barua if (dim[ndim-1]%2==0) 1069ba6e06d0SAmlan Barua NM = 2; 1070e5d7f247SAmlan Barua else 1071ba6e06d0SAmlan Barua NM = 1; 1072e5d7f247SAmlan Barua 10736971673cSAmlan Barua j = low; 10746971673cSAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 10756971673cSAmlan Barua { 10766971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 10776971673cSAmlan Barua indx2[i] = j; 10786971673cSAmlan Barua if (k%dim[ndim-1]==0) 10796971673cSAmlan Barua { j+=NM;} 10806971673cSAmlan Barua j++; 10816971673cSAmlan Barua } 10826971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 10836971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 10846971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 10856971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10866971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10876971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1088b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1089b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1090b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1091b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1092bd59e6c6SAmlan Barua #endif 10939496c9aeSAmlan Barua break; 10943c3a9c75SAmlan Barua } 1095e81bb599SAmlan Barua } 10963c3a9c75SAmlan Barua 10973c3a9c75SAmlan Barua return 0; 10983c3a9c75SAmlan Barua } 10993c3a9c75SAmlan Barua EXTERN_C_END 11003c3a9c75SAmlan Barua 11013c3a9c75SAmlan Barua #undef __FUNCT__ 11023c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT" 11033c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 11043c3a9c75SAmlan Barua { 11053c3a9c75SAmlan Barua PetscErrorCode ierr; 11063c3a9c75SAmlan Barua PetscFunctionBegin; 11073c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 11083c3a9c75SAmlan Barua PetscFunctionReturn(0); 11093c3a9c75SAmlan Barua } 111054efbe56SHong Zhang 11115b3e41ffSAmlan Barua /* 11125b3e41ffSAmlan Barua OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 11135b3e41ffSAmlan Barua Input A, x, y 11145b3e41ffSAmlan Barua A - FFTW matrix 11155b3e41ffSAmlan Barua x - FFTW vector 11165b3e41ffSAmlan Barua y - PETSc vector that user can read 11175b3e41ffSAmlan Barua Options Database Keys: 11185b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 11195b3e41ffSAmlan Barua 11205b3e41ffSAmlan Barua Level: intermediate 11215b3e41ffSAmlan Barua 11223c3a9c75SAmlan Barua */ 11233c3a9c75SAmlan Barua 11243c3a9c75SAmlan Barua EXTERN_C_BEGIN 11253c3a9c75SAmlan Barua #undef __FUNCT__ 11265b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW" 11275b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 11285b3e41ffSAmlan Barua { 11295b3e41ffSAmlan Barua PetscErrorCode ierr; 11305b3e41ffSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 11315b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 11325b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 11339496c9aeSAmlan Barua PetscInt N=fft->N; 1134b3655f67SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 11359496c9aeSAmlan Barua PetscInt low; 11369496c9aeSAmlan Barua PetscInt rank,size; 11375b3e41ffSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 11389496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 11395b3e41ffSAmlan Barua VecScatter vecscat; 11405b3e41ffSAmlan Barua IS list1,list2; 11419496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11429496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 11439496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 11449496c9aeSAmlan Barua PetscInt N1, n1 ,NM; 11459496c9aeSAmlan Barua ptrdiff_t temp; 11469496c9aeSAmlan Barua #endif 11479496c9aeSAmlan Barua 11485b3e41ffSAmlan Barua 11495b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 11505b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1151b3655f67SAmlan Barua ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr); 11525b3e41ffSAmlan Barua 1153e81bb599SAmlan Barua if (size==1){ 11547536937bSAmlan Barua /* 11555b3e41ffSAmlan Barua switch (ndim){ 11565b3e41ffSAmlan Barua case 1: 1157e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 1158e81bb599SAmlan Barua for (i=0;i<dim[0];i++) 1159e81bb599SAmlan Barua { 1160e81bb599SAmlan Barua indx1[i] = i; 1161e81bb599SAmlan Barua } 1162e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1163e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1164e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1165e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1166e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1167b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1168b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1169e81bb599SAmlan Barua break; 1170e81bb599SAmlan Barua 1171e81bb599SAmlan Barua case 2: 1172e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 1173e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 1174e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 1175e81bb599SAmlan Barua indx1[i*dim[1]+j] = i*dim[1] + j; 1176e81bb599SAmlan Barua } 1177e81bb599SAmlan Barua } 1178e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1179e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1180e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1181e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1182e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1183b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1184b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1185e81bb599SAmlan Barua break; 1186e81bb599SAmlan Barua case 3: 1187e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1188e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 1189e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 1190e81bb599SAmlan Barua for (k=0;k<dim[2];k++){ 1191e81bb599SAmlan Barua indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 1192e81bb599SAmlan Barua } 1193e81bb599SAmlan Barua } 1194e81bb599SAmlan Barua } 1195e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1196e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1197e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1198e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1199e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1200b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1201b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1202e81bb599SAmlan Barua break; 1203e81bb599SAmlan Barua default: 12047536937bSAmlan Barua */ 1205b3655f67SAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 12066971673cSAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF); 12076971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 12086971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12096971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12106971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1211b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 12127536937bSAmlan Barua // break; 12137536937bSAmlan Barua // } 1214e81bb599SAmlan Barua } 1215e81bb599SAmlan Barua else{ 1216e81bb599SAmlan Barua 1217e81bb599SAmlan Barua switch (ndim){ 1218e81bb599SAmlan Barua case 1: 121964657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 122064657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 12219496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1); 12229496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2); 122364657d84SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 122464657d84SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 122564657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 122664657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 122764657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 122864657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 122964657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 123064657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 123164657d84SAmlan Barua #else 12326a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 123364657d84SAmlan Barua #endif 12345b3e41ffSAmlan Barua break; 12355b3e41ffSAmlan Barua case 2: 1236bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1237bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1238bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 1239bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 1240bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1241bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1242bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1243bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1244bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1245bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1246bd59e6c6SAmlan Barua #else 12475b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 12485b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 12495b3e41ffSAmlan Barua 12509496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 12519496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 12525b3e41ffSAmlan Barua 12535b3e41ffSAmlan Barua if (dim[1]%2==0) 12545b3e41ffSAmlan Barua NM = dim[1]+2; 12555b3e41ffSAmlan Barua else 12565b3e41ffSAmlan Barua NM = dim[1]+1; 12575b3e41ffSAmlan Barua 12585b3e41ffSAmlan Barua for (i=0;i<local_n0;i++){ 12595b3e41ffSAmlan Barua for (j=0;j<dim[1];j++){ 12605b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 12615b3e41ffSAmlan Barua tempindx1 = i*NM + j; 12625b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 12635b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 12645b3e41ffSAmlan Barua } 12655b3e41ffSAmlan Barua } 12665b3e41ffSAmlan Barua 12675b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 12685b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 12695b3e41ffSAmlan Barua 12705b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 12715b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12725b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12735b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1274b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1275b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1276b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1277b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1278bd59e6c6SAmlan Barua #endif 12799496c9aeSAmlan Barua break; 12805b3e41ffSAmlan Barua case 3: 1281bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1282bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1283bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1284bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1285bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1286bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1287bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1288bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1289bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1290bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1291bd59e6c6SAmlan Barua #else 1292bd59e6c6SAmlan Barua 129351d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 129451d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 129551d1eed7SAmlan Barua 12969496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 12979496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 12989496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 12999496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 130051d1eed7SAmlan Barua 130151d1eed7SAmlan Barua if (dim[2]%2==0) 130251d1eed7SAmlan Barua NM = dim[2]+2; 130351d1eed7SAmlan Barua else 130451d1eed7SAmlan Barua NM = dim[2]+1; 130551d1eed7SAmlan Barua 130651d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 130751d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 130851d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 130951d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 131051d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 131151d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 131251d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 131351d1eed7SAmlan Barua } 131451d1eed7SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 131551d1eed7SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 131651d1eed7SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 131751d1eed7SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 131851d1eed7SAmlan Barua } 131951d1eed7SAmlan Barua } 132051d1eed7SAmlan Barua 132151d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 132251d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 132351d1eed7SAmlan Barua 132451d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 132551d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 132651d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 132751d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1328b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1329b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1330b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1331b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1332bd59e6c6SAmlan Barua #endif 13339496c9aeSAmlan Barua break; 13345b3e41ffSAmlan Barua default: 1335bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1336bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1337bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1338bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1339bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1340bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1341bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1342bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1343bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1344bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1345bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1346bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1347bd59e6c6SAmlan Barua #else 1348ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1349ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1350ba6e06d0SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1351ba6e06d0SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1352ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1353ba6e06d0SAmlan Barua 1354ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1355ba6e06d0SAmlan Barua 1356ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1357ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1358ba6e06d0SAmlan Barua 1359ba6e06d0SAmlan Barua if (dim[ndim-1]%2==0) 1360ba6e06d0SAmlan Barua NM = 2; 1361ba6e06d0SAmlan Barua else 1362ba6e06d0SAmlan Barua NM = 1; 1363ba6e06d0SAmlan Barua 1364ba6e06d0SAmlan Barua j = low; 1365ba6e06d0SAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1366ba6e06d0SAmlan Barua { 1367ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1368ba6e06d0SAmlan Barua indx2[i] = j; 1369ba6e06d0SAmlan Barua if (k%dim[ndim-1]==0) 1370ba6e06d0SAmlan Barua { j+=NM;} 1371ba6e06d0SAmlan Barua j++; 1372ba6e06d0SAmlan Barua } 1373ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1374ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1375ba6e06d0SAmlan Barua 1376ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1377ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1378ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1379ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1380b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1381b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1382b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1383b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1384bd59e6c6SAmlan Barua #endif 13859496c9aeSAmlan Barua break; 13865b3e41ffSAmlan Barua } 1387e81bb599SAmlan Barua } 13885b3e41ffSAmlan Barua return 0; 13895b3e41ffSAmlan Barua } 13905b3e41ffSAmlan Barua EXTERN_C_END 13915b3e41ffSAmlan Barua 13925b3e41ffSAmlan Barua EXTERN_C_BEGIN 13935b3e41ffSAmlan Barua #undef __FUNCT__ 13943c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 13953c3a9c75SAmlan Barua /* 13963c3a9c75SAmlan Barua MatCreate_FFTW - Creates a matrix object that provides FFT 13973c3a9c75SAmlan Barua via the external package FFTW 13983c3a9c75SAmlan Barua Options Database Keys: 13993c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 14003c3a9c75SAmlan Barua 14013c3a9c75SAmlan Barua Level: intermediate 14023c3a9c75SAmlan Barua 14033c3a9c75SAmlan Barua */ 14043c3a9c75SAmlan Barua 1405b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 1406b2b77a04SHong Zhang { 1407b2b77a04SHong Zhang PetscErrorCode ierr; 1408b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 1409b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1410b2b77a04SHong Zhang Mat_FFTW *fftw; 1411b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1412b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1413b2b77a04SHong Zhang PetscBool flg; 1414*4f48bc67SAmlan Barua PetscInt p_flag,partial_dim=1,ctr; 141511902ff2SHong Zhang PetscMPIInt size,rank; 14169496c9aeSAmlan Barua ptrdiff_t *pdim; 14179496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 14189496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 14199496c9aeSAmlan Barua ptrdiff_t temp; 14209496c9aeSAmlan Barua PetscInt N1; 1421*4f48bc67SAmlan Barua #else 1422*4f48bc67SAmlan Barua PetscInt n1; 14239496c9aeSAmlan Barua #endif 14249496c9aeSAmlan Barua 1425b2b77a04SHong Zhang 1426b2b77a04SHong Zhang PetscFunctionBegin; 1427b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 142811902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 142954dd5118SAmlan Barua ierr = MPI_Barrier(PETSC_COMM_WORLD); 1430c92418ccSAmlan Barua 143111902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 143211902ff2SHong Zhang pdim[0] = dim[0]; 143311902ff2SHong Zhang for (ctr=1;ctr<ndim;ctr++) 143411902ff2SHong Zhang { 14356882372aSHong Zhang partial_dim *= dim[ctr]; 143611902ff2SHong Zhang pdim[ctr] = dim[ctr]; 14376882372aSHong Zhang } 14383c3a9c75SAmlan Barua 1439b2b77a04SHong Zhang if (size == 1) { 1440e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1441b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1442b2b77a04SHong Zhang n = N; 1443e81bb599SAmlan Barua #else 14449496c9aeSAmlan Barua PetscInt tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1]; 1445e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1446e81bb599SAmlan Barua n = tot_dim; 1447e81bb599SAmlan Barua #endif 1448e81bb599SAmlan Barua 1449b2b77a04SHong Zhang } else { 14509496c9aeSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start;//local_1_end; 1451b2b77a04SHong Zhang switch (ndim){ 1452b2b77a04SHong Zhang case 1: 14533c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 14543c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1455e5d7f247SAmlan Barua #else 14569496c9aeSAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 14576882372aSHong Zhang n = (PetscInt)local_n0; 14589496c9aeSAmlan Barua n1 = (PetscInt) local_n1; 14599496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1460e5d7f247SAmlan Barua #endif 1461b2b77a04SHong Zhang break; 1462b2b77a04SHong Zhang case 2: 14635b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 1464b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1465b2b77a04SHong Zhang /* 1466b2b77a04SHong Zhang PetscMPIInt rank; 1467b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1468b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 1469b2b77a04SHong Zhang */ 1470b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1471b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 14725b3e41ffSAmlan Barua #else 14735b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 14745b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1475c8a8a4f0SAmlan Barua // n1 = 2*(PetscInt)local_n1*(dim[0]); 1476c8a8a4f0SAmlan Barua // ierr = MatSetSizes(A,n1,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1477c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 14785b3e41ffSAmlan Barua #endif 1479b2b77a04SHong Zhang break; 1480b2b77a04SHong Zhang case 3: 148151d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 148251d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 14836882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 14846882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 148551d1eed7SAmlan Barua #else 148651d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 148751d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1488c8a8a4f0SAmlan Barua // n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 1489c8a8a4f0SAmlan Barua // ierr = MatSetSizes(A,n1,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1490c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 149151d1eed7SAmlan Barua #endif 1492b2b77a04SHong Zhang break; 1493b2b77a04SHong Zhang default: 1494b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 149511902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 14966882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 14976882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1498b3a17365SAmlan Barua #else 1499b3a17365SAmlan Barua temp = pdim[ndim-1]; 1500b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 1501b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1502b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1503b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1504b3a17365SAmlan Barua pdim[ndim-1] = temp; 1505c8a8a4f0SAmlan Barua // temp = partial_dim*(dim[ndim-1]/2+1)*dim[0]/(dim[1]*dim[ndim-1]); 1506c8a8a4f0SAmlan Barua // n1 = 2*local_n1*temp; 1507c8a8a4f0SAmlan Barua // ierr = MatSetSizes(A,n1,n,N1,N1);CHKERRQ(ierr); 1508c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1509b3a17365SAmlan Barua #endif 1510b2b77a04SHong Zhang break; 1511b2b77a04SHong Zhang } 1512b2b77a04SHong Zhang } 1513b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1514b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1515b2b77a04SHong Zhang fft->data = (void*)fftw; 1516b2b77a04SHong Zhang 1517b2b77a04SHong Zhang fft->n = n; 1518c92418ccSAmlan Barua fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1519e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 1520c92418ccSAmlan Barua ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1521b1301b2eSHong Zhang for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1522c92418ccSAmlan Barua 1523b2b77a04SHong Zhang fftw->p_forward = 0; 1524b2b77a04SHong Zhang fftw->p_backward = 0; 1525b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1526b2b77a04SHong Zhang 1527b2b77a04SHong Zhang if (size == 1){ 1528b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1529b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1530b2b77a04SHong Zhang } else { 1531b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1532b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1533b2b77a04SHong Zhang } 1534b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 15356a506ed8SAmlan Barua // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D 15364be45526SHong Zhang //A->ops->getvecs = MatGetVecs_FFTW; 1537b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 15384be45526SHong Zhang PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW); 15393c3a9c75SAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 15405b3e41ffSAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 1541b2b77a04SHong Zhang 1542b2b77a04SHong Zhang /* get runtime options */ 1543b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1544b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1545b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 1546b2b77a04SHong Zhang PetscOptionsEnd(); 1547b2b77a04SHong Zhang PetscFunctionReturn(0); 1548b2b77a04SHong Zhang } 1549b2b77a04SHong Zhang EXTERN_C_END 15503c3a9c75SAmlan Barua 15513c3a9c75SAmlan Barua 15523c3a9c75SAmlan Barua 15533c3a9c75SAmlan Barua 1554