1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14e5d7f247SAmlan Barua PetscInt partial_dim; 15b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 16b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 19b2b77a04SHong Zhang } Mat_FFTW; 20b2b77a04SHong Zhang 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 274be45526SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); // to be removed! 28b2b77a04SHong Zhang 29b2b77a04SHong Zhang #undef __FUNCT__ 30b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 31b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 32b2b77a04SHong Zhang { 33b2b77a04SHong Zhang PetscErrorCode ierr; 34b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 35b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 36b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 37b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 38b2b77a04SHong Zhang 39b2b77a04SHong Zhang PetscFunctionBegin; 40b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 41b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 42b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 43b2b77a04SHong Zhang switch (ndim){ 44b2b77a04SHong Zhang case 1: 4558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 46b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 4758a912c5SAmlan Barua #else 4858a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 4958a912c5SAmlan Barua #endif 50b2b77a04SHong Zhang break; 51b2b77a04SHong Zhang case 2: 5258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 53b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 5458a912c5SAmlan Barua #else 5558a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 5658a912c5SAmlan Barua #endif 57b2b77a04SHong Zhang break; 58b2b77a04SHong Zhang case 3: 5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 60b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6158a912c5SAmlan Barua #else 6258a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 6358a912c5SAmlan Barua #endif 64b2b77a04SHong Zhang break; 65b2b77a04SHong Zhang default: 6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 67b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6858a912c5SAmlan Barua #else 6958a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 7058a912c5SAmlan Barua #endif 71b2b77a04SHong Zhang break; 72b2b77a04SHong Zhang } 73b2b77a04SHong Zhang fftw->finarray = x_array; 74b2b77a04SHong Zhang fftw->foutarray = y_array; 75b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 76b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 77b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 78b2b77a04SHong Zhang } else { /* use existing plan */ 79b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 80b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 81b2b77a04SHong Zhang } else { 82b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 83b2b77a04SHong Zhang } 84b2b77a04SHong Zhang } 85b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 86b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 87b2b77a04SHong Zhang PetscFunctionReturn(0); 88b2b77a04SHong Zhang } 89b2b77a04SHong Zhang 90b2b77a04SHong Zhang #undef __FUNCT__ 91b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 92b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 93b2b77a04SHong Zhang { 94b2b77a04SHong Zhang PetscErrorCode ierr; 95b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 96b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 97b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 98b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 99b2b77a04SHong Zhang 100b2b77a04SHong Zhang PetscFunctionBegin; 101b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 102b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 103b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 104b2b77a04SHong Zhang switch (ndim){ 105b2b77a04SHong Zhang case 1: 10658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 107b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 10858a912c5SAmlan Barua #else 109e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 11058a912c5SAmlan Barua #endif 111b2b77a04SHong Zhang break; 112b2b77a04SHong Zhang case 2: 11358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 114b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 11558a912c5SAmlan Barua #else 116e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 11758a912c5SAmlan Barua #endif 118b2b77a04SHong Zhang break; 119b2b77a04SHong Zhang case 3: 12058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 121b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12258a912c5SAmlan Barua #else 123e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 12458a912c5SAmlan Barua #endif 125b2b77a04SHong Zhang break; 126b2b77a04SHong Zhang default: 12758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 128b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12958a912c5SAmlan Barua #else 130e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 13158a912c5SAmlan Barua #endif 132b2b77a04SHong Zhang break; 133b2b77a04SHong Zhang } 134b2b77a04SHong Zhang fftw->binarray = x_array; 135b2b77a04SHong Zhang fftw->boutarray = y_array; 136b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 137b2b77a04SHong Zhang } else { /* use existing plan */ 138b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 139b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 140b2b77a04SHong Zhang } else { 141b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 142b2b77a04SHong Zhang } 143b2b77a04SHong Zhang } 144b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 145b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 146b2b77a04SHong Zhang PetscFunctionReturn(0); 147b2b77a04SHong Zhang } 148b2b77a04SHong Zhang 149b2b77a04SHong Zhang #undef __FUNCT__ 150b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 151b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 152b2b77a04SHong Zhang { 153b2b77a04SHong Zhang PetscErrorCode ierr; 154b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 155b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 156b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 157c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 158b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 159b2b77a04SHong Zhang 160b2b77a04SHong Zhang PetscFunctionBegin; 161b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 162b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 163b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 164b2b77a04SHong Zhang switch (ndim){ 165b2b77a04SHong Zhang case 1: 1663c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 167b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 168ae0a50aaSHong Zhang #else 169ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 1703c3a9c75SAmlan Barua #endif 171b2b77a04SHong Zhang break; 172b2b77a04SHong Zhang case 2: 1733c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 174b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1753c3a9c75SAmlan Barua #else 1763c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1773c3a9c75SAmlan Barua #endif 178b2b77a04SHong Zhang break; 179b2b77a04SHong Zhang case 3: 1803c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 181b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1823c3a9c75SAmlan Barua #else 18351d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1843c3a9c75SAmlan Barua #endif 185b2b77a04SHong Zhang break; 186b2b77a04SHong Zhang default: 1873c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 188c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1893c3a9c75SAmlan Barua #else 1903c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1913c3a9c75SAmlan Barua #endif 192b2b77a04SHong Zhang break; 193b2b77a04SHong Zhang } 194b2b77a04SHong Zhang fftw->finarray = x_array; 195b2b77a04SHong Zhang fftw->foutarray = y_array; 196b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 197b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 198b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 199b2b77a04SHong Zhang } else { /* use existing plan */ 200b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 201b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 202b2b77a04SHong Zhang } else { 203b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 204b2b77a04SHong Zhang } 205b2b77a04SHong Zhang } 206b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 207b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 208b2b77a04SHong Zhang PetscFunctionReturn(0); 209b2b77a04SHong Zhang } 210b2b77a04SHong Zhang 211b2b77a04SHong Zhang #undef __FUNCT__ 212b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 213b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 214b2b77a04SHong Zhang { 215b2b77a04SHong Zhang PetscErrorCode ierr; 216b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 217b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 218b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 219c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 220b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 221b2b77a04SHong Zhang 222b2b77a04SHong Zhang PetscFunctionBegin; 223b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 224b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 225b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 226b2b77a04SHong Zhang switch (ndim){ 227b2b77a04SHong Zhang case 1: 2283c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 229b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 230ae0a50aaSHong Zhang #else 231ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 2323c3a9c75SAmlan Barua #endif 233b2b77a04SHong Zhang break; 234b2b77a04SHong Zhang case 2: 2353c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 236b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2373c3a9c75SAmlan Barua #else 2383c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2393c3a9c75SAmlan Barua #endif 240b2b77a04SHong Zhang break; 241b2b77a04SHong Zhang case 3: 2423c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 243b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2443c3a9c75SAmlan Barua #else 2453c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2463c3a9c75SAmlan Barua #endif 247b2b77a04SHong Zhang break; 248b2b77a04SHong Zhang default: 2493c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 250c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2513c3a9c75SAmlan Barua #else 2523c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2533c3a9c75SAmlan Barua #endif 254b2b77a04SHong Zhang break; 255b2b77a04SHong Zhang } 256b2b77a04SHong Zhang fftw->binarray = x_array; 257b2b77a04SHong Zhang fftw->boutarray = y_array; 258b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 259b2b77a04SHong Zhang } else { /* use existing plan */ 260b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 261b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 262b2b77a04SHong Zhang } else { 263b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 264b2b77a04SHong Zhang } 265b2b77a04SHong Zhang } 266b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 267b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 268b2b77a04SHong Zhang PetscFunctionReturn(0); 269b2b77a04SHong Zhang } 270b2b77a04SHong Zhang 271b2b77a04SHong Zhang #undef __FUNCT__ 272b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 273b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 274b2b77a04SHong Zhang { 275b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 276b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 277b2b77a04SHong Zhang PetscErrorCode ierr; 278b2b77a04SHong Zhang 279b2b77a04SHong Zhang PetscFunctionBegin; 280b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 281b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 282b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 283bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 284b2b77a04SHong Zhang PetscFunctionReturn(0); 285b2b77a04SHong Zhang } 286b2b77a04SHong Zhang 287c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 288b2b77a04SHong Zhang #undef __FUNCT__ 289b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 290b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 291b2b77a04SHong Zhang { 292b2b77a04SHong Zhang PetscErrorCode ierr; 293b2b77a04SHong Zhang PetscScalar *array; 294b2b77a04SHong Zhang 295b2b77a04SHong Zhang PetscFunctionBegin; 296b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 297b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 298b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 299b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 300b2b77a04SHong Zhang PetscFunctionReturn(0); 301b2b77a04SHong Zhang } 302b2b77a04SHong Zhang 3033c3a9c75SAmlan Barua 3044f7415efSAmlan Barua #undef __FUNCT__ 3054be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW" 3064be45526SHong Zhang /*@ 307b2aa233eSHong Zhang MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 3084f7415efSAmlan Barua parallel layout determined by FFTW 3094f7415efSAmlan Barua 3104f7415efSAmlan Barua Collective on Mat 3114f7415efSAmlan Barua 3124f7415efSAmlan Barua Input Parameter: 3134f7415efSAmlan Barua . mat - the matrix 3144f7415efSAmlan Barua 3154f7415efSAmlan Barua Output Parameter: 3164f7415efSAmlan Barua + fin - (optional) input vector of forward FFTW 3174f7415efSAmlan Barua - fout - (optional) output vector of forward FFTW 3184f7415efSAmlan Barua 3194f7415efSAmlan Barua Level: advanced 3204f7415efSAmlan Barua 3214f7415efSAmlan Barua .seealso: MatCreateFFTW() 3224be45526SHong Zhang @*/ 3234be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 3244be45526SHong Zhang { 3254be45526SHong Zhang PetscErrorCode ierr; 3264be45526SHong Zhang PetscFunctionBegin; 3274be45526SHong Zhang ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 3284be45526SHong Zhang PetscFunctionReturn(0); 3294be45526SHong Zhang } 3304be45526SHong Zhang 3314f7415efSAmlan Barua EXTERN_C_BEGIN 3324be45526SHong Zhang #undef __FUNCT__ 3334be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW" 3344be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 3354f7415efSAmlan Barua { 3364f7415efSAmlan Barua PetscErrorCode ierr; 3374f7415efSAmlan Barua PetscMPIInt size,rank; 3384f7415efSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 3394f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 3404f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 3419496c9aeSAmlan Barua PetscInt N=fft->N; 3424f7415efSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 3434f7415efSAmlan Barua 3444f7415efSAmlan Barua PetscFunctionBegin; 3454f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 3464f7415efSAmlan Barua PetscValidType(A,1); 3474f7415efSAmlan Barua 3484f7415efSAmlan Barua ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 3494f7415efSAmlan Barua ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 3504f7415efSAmlan Barua if (size == 1){ /* sequential case */ 3514f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 3524f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 3534f7415efSAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 3544f7415efSAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 3554f7415efSAmlan Barua #else 356*8ca4c034SAmlan Barua // if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 357*8ca4c034SAmlan Barua // if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 358*8ca4c034SAmlan Barua // if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);} 359*8ca4c034SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 360*8ca4c034SAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 361*8ca4c034SAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 3624f7415efSAmlan Barua #endif 3634f7415efSAmlan Barua } else { 3644f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 3659496c9aeSAmlan Barua ptrdiff_t local_n1; 3669496c9aeSAmlan Barua fftw_complex *data_fout; 3679496c9aeSAmlan Barua ptrdiff_t local_1_start; 3689496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 3699496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 3709496c9aeSAmlan Barua #else 3714f7415efSAmlan Barua double *data_finr,*data_boutr; 3729496c9aeSAmlan Barua PetscInt n1,N1,vsize; 3739496c9aeSAmlan Barua ptrdiff_t temp; 3749496c9aeSAmlan Barua #endif 3759496c9aeSAmlan Barua 3764f7415efSAmlan Barua switch (ndim){ 3774f7415efSAmlan Barua case 1: 37857625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 37957625b84SAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 38057625b84SAmlan Barua #else 3819496c9aeSAmlan Barua //SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet"); 38257625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 38357625b84SAmlan Barua if (fin) { 38457625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 38557625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 38657625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 38757625b84SAmlan Barua } 38857625b84SAmlan Barua if (fout) { 38957625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 39057625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 39157625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 39257625b84SAmlan Barua } 39357625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 39457625b84SAmlan Barua if (bout) { 39557625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 39657625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 39757625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 39857625b84SAmlan Barua } 39957625b84SAmlan Barua break; 40057625b84SAmlan Barua #endif 4014f7415efSAmlan Barua case 2: 4024f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4034f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4044f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 4054f7415efSAmlan Barua if (fin) { 4064f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4074f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 4084f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4094f7415efSAmlan Barua } 4104f7415efSAmlan Barua if (bout) { 4114f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4124f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 4134f7415efSAmlan Barua ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 4144f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4154f7415efSAmlan Barua } 416c8a8a4f0SAmlan Barua //n1 = 2*local_n1*dim[0]; 41757625b84SAmlan Barua if (fout) { 41857625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4199496c9aeSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 42057625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 42157625b84SAmlan Barua } 4224f7415efSAmlan Barua #else 4234f7415efSAmlan Barua /* Get local size */ 4244f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 4254f7415efSAmlan Barua if (fin) { 4264f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4274f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4284f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4294f7415efSAmlan Barua } 4304f7415efSAmlan Barua if (fout) { 4314f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4324f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4334f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4344f7415efSAmlan Barua } 4354f7415efSAmlan Barua if (bout) { 4364f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4374f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 4384f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4394f7415efSAmlan Barua } 4404f7415efSAmlan Barua #endif 4414f7415efSAmlan Barua break; 4424f7415efSAmlan Barua case 3: 4434f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4444f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4454f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 4464f7415efSAmlan Barua if (fin) { 4474f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4484f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 4494f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4504f7415efSAmlan Barua } 4514f7415efSAmlan Barua if (bout) { 4524f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4534f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 4544f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4554f7415efSAmlan Barua } 456c8a8a4f0SAmlan Barua //n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 45757625b84SAmlan Barua if (fout) { 45857625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 45957625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 46057625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 46157625b84SAmlan Barua } 4624f7415efSAmlan Barua #else 4630c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 4640c9b18e4SAmlan Barua if (fin) { 4650c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4660c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4670c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4680c9b18e4SAmlan Barua } 4690c9b18e4SAmlan Barua if (fout) { 4700c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4710c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4720c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4730c9b18e4SAmlan Barua } 4740c9b18e4SAmlan Barua if (bout) { 4750c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4760c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 4770c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4780c9b18e4SAmlan Barua } 4794f7415efSAmlan Barua #endif 4804f7415efSAmlan Barua break; 4814f7415efSAmlan Barua default: 4824f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4834f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 4844f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 4854f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4864f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 4874f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 4884f7415efSAmlan Barua 4894f7415efSAmlan Barua if (fin) { 4904f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4914f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 4924f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4934f7415efSAmlan Barua } 4944f7415efSAmlan Barua if (bout) { 4954f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4964f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 4979496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4984f7415efSAmlan Barua } 499c8a8a4f0SAmlan Barua //temp = fftw->partial_dim; 500c8a8a4f0SAmlan Barua //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 501c8a8a4f0SAmlan Barua //n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 50257625b84SAmlan Barua if (fout) { 50357625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 50457625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 50557625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 50657625b84SAmlan Barua } 5074f7415efSAmlan Barua #else 5080c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 5090c9b18e4SAmlan Barua if (fin) { 5100c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5110c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5120c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5130c9b18e4SAmlan Barua } 5140c9b18e4SAmlan Barua if (fout) { 5150c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5160c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5170c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5180c9b18e4SAmlan Barua } 5190c9b18e4SAmlan Barua if (bout) { 5200c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5210c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5220c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5230c9b18e4SAmlan Barua } 5244f7415efSAmlan Barua #endif 5254f7415efSAmlan Barua break; 5264f7415efSAmlan Barua } 5274f7415efSAmlan Barua } 5284f7415efSAmlan Barua PetscFunctionReturn(0); 5294f7415efSAmlan Barua } 5304f7415efSAmlan Barua EXTERN_C_END 5314f7415efSAmlan Barua 532c92418ccSAmlan Barua #undef __FUNCT__ 533b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW" 534b2b77a04SHong Zhang /* 535b2b77a04SHong Zhang MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 536b2b77a04SHong Zhang parallel layout determined by FFTW 537b2b77a04SHong Zhang 538b2b77a04SHong Zhang Collective on Mat 539b2b77a04SHong Zhang 540b2b77a04SHong Zhang Input Parameter: 541b2b77a04SHong Zhang . mat - the matrix 542b2b77a04SHong Zhang 543b2b77a04SHong Zhang Output Parameter: 544b2b77a04SHong Zhang + fin - (optional) input vector of forward FFTW 545b2b77a04SHong Zhang - fout - (optional) output vector of forward FFTW 546b2b77a04SHong Zhang 547b2b77a04SHong Zhang Level: advanced 548b2b77a04SHong Zhang 549b2b77a04SHong Zhang .seealso: MatCreateFFTW() 550b2b77a04SHong Zhang */ 551b2b77a04SHong Zhang PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 552b2b77a04SHong Zhang { 553b2b77a04SHong Zhang PetscErrorCode ierr; 554b2b77a04SHong Zhang PetscMPIInt size,rank; 555b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 556b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 557c92418ccSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 5589496c9aeSAmlan Barua PetscInt N=fft->N; 559e81bb599SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 560b2b77a04SHong Zhang 561b2b77a04SHong Zhang PetscFunctionBegin; 562b2b77a04SHong Zhang PetscValidHeaderSpecific(A,MAT_CLASSID,1); 563b2b77a04SHong Zhang PetscValidType(A,1); 564b2b77a04SHong Zhang 565b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 566b2b77a04SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 567b2b77a04SHong Zhang if (size == 1){ /* sequential case */ 568e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX) 569b2b77a04SHong Zhang if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 570b2b77a04SHong Zhang if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 571e5d7f247SAmlan Barua #else 572e81bb599SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 573e81bb599SAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 574e81bb599SAmlan Barua #endif 575b2b77a04SHong Zhang } else { /* mpi case */ 576b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 5776882372aSHong Zhang ptrdiff_t local_n1,local_1_end; 578b2b77a04SHong Zhang fftw_complex *data_fin,*data_fout; 5799496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 58051d1eed7SAmlan Barua double *data_finr ; 581b3a17365SAmlan Barua ptrdiff_t local_1_start,temp; 5829496c9aeSAmlan Barua PetscInt vsize,n1,N1; 5839496c9aeSAmlan Barua #endif 5849496c9aeSAmlan Barua 585c92418ccSAmlan Barua // PetscInt ctr; 586c92418ccSAmlan Barua // ptrdiff_t ndim1,*pdim; 587c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 588c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 58911902ff2SHong Zhang 590c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 591f76f76beSAmlan Barua // {k 592c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 593c92418ccSAmlan Barua // } 594b2b77a04SHong Zhang 595f76f76beSAmlan Barua 596f76f76beSAmlan Barua 597b2b77a04SHong Zhang switch (ndim){ 598b2b77a04SHong Zhang case 1: 5996882372aSHong Zhang /* Get local size */ 600e5d7f247SAmlan Barua /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */ 601c92418ccSAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 6026882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 6039496c9aeSAmlan Barua printf("The values of local_n0 and local_n1 are %ld and %ld\n",local_n0,local_n1); 6046882372aSHong Zhang if (fin) { 6056882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6066882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 6076882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6086882372aSHong Zhang } 6096882372aSHong Zhang if (fout) { 6106882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 61157625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6126882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6136882372aSHong Zhang } 614b2b77a04SHong Zhang break; 615b2b77a04SHong Zhang case 2: 6163c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6173c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 6183c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 6193c3a9c75SAmlan Barua if (fin) { 6203c3a9c75SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 62154dd5118SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 6223c3a9c75SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 62309dd8a53SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 6243c3a9c75SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6253c3a9c75SAmlan Barua } 62657625b84SAmlan Barua n1 = 2*local_n1*(dim[0]); 62757625b84SAmlan Barua //n1 = 2*local_n1*dim[1]; 62857625b84SAmlan Barua printf("The values are %ld\n",local_n1); 6293c3a9c75SAmlan Barua if (fout) { 63009dd8a53SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 63109dd8a53SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6323c3a9c75SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6333c3a9c75SAmlan Barua } 634f76f76beSAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 6353c3a9c75SAmlan Barua 6363c3a9c75SAmlan Barua #else 637b2b77a04SHong Zhang /* Get local size */ 63864657d84SAmlan Barua //printf("Hope this does not come here"); 639b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 640b2b77a04SHong Zhang if (fin) { 641b2b77a04SHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 642b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 643b2b77a04SHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 644b2b77a04SHong Zhang } 645b2b77a04SHong Zhang if (fout) { 646b2b77a04SHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 647b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 648b2b77a04SHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 649b2b77a04SHong Zhang } 65064657d84SAmlan Barua //printf("Hope this does not come here"); 6513c3a9c75SAmlan Barua #endif 652b2b77a04SHong Zhang break; 653b2b77a04SHong Zhang case 3: 6546882372aSHong Zhang /* Get local size */ 6553c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 65651d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 65751d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 65851d1eed7SAmlan Barua if (fin) { 65951d1eed7SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 66051d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 66151d1eed7SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 66251d1eed7SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 66351d1eed7SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 66451d1eed7SAmlan Barua } 66557625b84SAmlan Barua printf("The value is %ld",local_n1); 66657625b84SAmlan Barua n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 66751d1eed7SAmlan Barua if (fout) { 66851d1eed7SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 66951d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 67051d1eed7SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 67151d1eed7SAmlan Barua } 67251d1eed7SAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 67351d1eed7SAmlan Barua 67451d1eed7SAmlan Barua 6753c3a9c75SAmlan Barua #else 6766882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 67711902ff2SHong Zhang // printf("The quantity n is %d",n); 6786882372aSHong Zhang if (fin) { 6796882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6806882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 6816882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6826882372aSHong Zhang } 6836882372aSHong Zhang if (fout) { 6846882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6856882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6866882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6876882372aSHong Zhang } 6883c3a9c75SAmlan Barua #endif 689b2b77a04SHong Zhang break; 690b2b77a04SHong Zhang default: 6916882372aSHong Zhang /* Get local size */ 6923c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 693b3a17365SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 694b3a17365SAmlan Barua printf("The value of temp is %ld\n",temp); 695b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 696b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 697b3a17365SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 698b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 699b3a17365SAmlan Barua if (fin) { 700b3a17365SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 701b3a17365SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 702b3a17365SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 703b3a17365SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 704b3a17365SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 705b3a17365SAmlan Barua } 70657625b84SAmlan Barua printf("The value is %ld. Global length is %d \n",local_n1,N1); 70757625b84SAmlan Barua temp = fftw->partial_dim; 70857625b84SAmlan Barua fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 70957625b84SAmlan Barua 71057625b84SAmlan Barua n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 711b3a17365SAmlan Barua if (fout) { 712b3a17365SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 71357625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 714b3a17365SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 715b3a17365SAmlan Barua } 716b3a17365SAmlan Barua 7173c3a9c75SAmlan Barua #else 718c92418ccSAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 71911902ff2SHong Zhang // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 72011902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 72111902ff2SHong Zhang // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 72211902ff2SHong Zhang // for(i=0;i<ndim;i++) 72311902ff2SHong Zhang // { 72411902ff2SHong Zhang // pdim[i]=dim[i];printf("%d",pdim[i]); 72511902ff2SHong Zhang // } 72611902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 72711902ff2SHong Zhang // printf("The quantity n is %d",n); 7286882372aSHong Zhang if (fin) { 7296882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7306882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 7316882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 7326882372aSHong Zhang } 7336882372aSHong Zhang if (fout) { 7346882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7356882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 7366882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 7376882372aSHong Zhang } 7383c3a9c75SAmlan Barua #endif 739b2b77a04SHong Zhang break; 740b2b77a04SHong Zhang } 741b2b77a04SHong Zhang } 74254dd5118SAmlan Barua // if (fin){ 74354dd5118SAmlan Barua // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 74454dd5118SAmlan Barua // } 74554dd5118SAmlan Barua // if (fout){ 74654dd5118SAmlan Barua // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 74754dd5118SAmlan Barua // } 748b2b77a04SHong Zhang PetscFunctionReturn(0); 749b2b77a04SHong Zhang } 750b2b77a04SHong Zhang 751b2b77a04SHong Zhang #undef __FUNCT__ 7523c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT" 7533c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 7543c3a9c75SAmlan Barua { 7553c3a9c75SAmlan Barua PetscErrorCode ierr; 7563c3a9c75SAmlan Barua PetscFunctionBegin; 7573c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 7583c3a9c75SAmlan Barua PetscFunctionReturn(0); 7593c3a9c75SAmlan Barua } 76054efbe56SHong Zhang 761b2b77a04SHong Zhang /* 7629496c9aeSAmlan Barua InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block. For real 7639496c9aeSAmlan Barua parallel FFT, this routine also performs padding of right number of zeros at the end of the fastetst 7649496c9aeSAmlan Barua changing dimension. 7653c3a9c75SAmlan Barua Input A, x, y 7663c3a9c75SAmlan Barua A - FFTW matrix 76754dd5118SAmlan Barua x - user data 768b2b77a04SHong Zhang Options Database Keys: 769b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 770b2b77a04SHong Zhang 771b2b77a04SHong Zhang Level: intermediate 772b2b77a04SHong Zhang 773b2b77a04SHong Zhang */ 7743c3a9c75SAmlan Barua 7753c3a9c75SAmlan Barua EXTERN_C_BEGIN 7763c3a9c75SAmlan Barua #undef __FUNCT__ 7773c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW" 7783c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 7793c3a9c75SAmlan Barua { 7803c3a9c75SAmlan Barua PetscErrorCode ierr; 7813c3a9c75SAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 7823c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 7833c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 7849496c9aeSAmlan Barua PetscInt N=fft->N; 785b223cc97SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 7869496c9aeSAmlan Barua PetscInt low; 787*8ca4c034SAmlan Barua PetscInt rank,size,vsize,vsize1; 7883c3a9c75SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 7899496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 7903c3a9c75SAmlan Barua VecScatter vecscat; 7913c3a9c75SAmlan Barua IS list1,list2; 7929496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 7939496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 7949496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 7959496c9aeSAmlan Barua PetscInt N1, n1 ,NM; 7969496c9aeSAmlan Barua ptrdiff_t temp; 7979496c9aeSAmlan Barua #endif 7983c3a9c75SAmlan Barua 799f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 800f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 8013c3a9c75SAmlan Barua ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 8023c3a9c75SAmlan Barua 803e81bb599SAmlan Barua if (size==1) 804e81bb599SAmlan Barua { 8057536937bSAmlan Barua /* switch (ndim){ 806e81bb599SAmlan Barua case 1: 807e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 808e81bb599SAmlan Barua for (i=0;i<dim[0];i++) 809e81bb599SAmlan Barua { 810e81bb599SAmlan Barua indx1[i] = i; 811e81bb599SAmlan Barua } 812e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 813e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 814e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 815e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 816e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 817b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 818b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 819e81bb599SAmlan Barua break; 820e81bb599SAmlan Barua 821e81bb599SAmlan Barua case 2: 822e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 823e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 824e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 825e81bb599SAmlan Barua indx1[i*dim[1]+j] = i*dim[1] + j; 826e81bb599SAmlan Barua } 827e81bb599SAmlan Barua } 828e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 829e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 830e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 831e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 832e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 833b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 834b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 835e81bb599SAmlan Barua break; 836e81bb599SAmlan Barua case 3: 837e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 838e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 839e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 840e81bb599SAmlan Barua for (k=0;k<dim[2];k++){ 841e81bb599SAmlan Barua indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 842e81bb599SAmlan Barua } 843e81bb599SAmlan Barua } 844e81bb599SAmlan Barua } 845e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 846e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 847e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 848e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 849e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 850b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 851b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 852e81bb599SAmlan Barua break; 853e81bb599SAmlan Barua default: 8547536937bSAmlan Barua */ 855*8ca4c034SAmlan Barua ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 856*8ca4c034SAmlan Barua ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 857*8ca4c034SAmlan Barua printf("The values of sizes are %d and %d",vsize,vsize1); 8589496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 8596971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 8606971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8616971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8626971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 863b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 8647536937bSAmlan Barua // break; 8657536937bSAmlan Barua // } 866e81bb599SAmlan Barua } 867e81bb599SAmlan Barua 868e81bb599SAmlan Barua else{ 869e81bb599SAmlan Barua 8703c3a9c75SAmlan Barua switch (ndim){ 8713c3a9c75SAmlan Barua case 1: 87264657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 87364657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 87464657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 87564657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 87664657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 87764657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 87864657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 87964657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 88064657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 88164657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 88264657d84SAmlan Barua #else 883e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 88464657d84SAmlan Barua #endif 8853c3a9c75SAmlan Barua break; 8863c3a9c75SAmlan Barua case 2: 887bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 888bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 889bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 890bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 891bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 892bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 893bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 894bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 895bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 896bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 897bd59e6c6SAmlan Barua #else 8983c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 8993c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 9009496c9aeSAmlan Barua //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 9019496c9aeSAmlan Barua //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 9029496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 9039496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 9043c3a9c75SAmlan Barua 9053c3a9c75SAmlan Barua if (dim[1]%2==0) 9063c3a9c75SAmlan Barua NM = dim[1]+2; 9073c3a9c75SAmlan Barua else 9083c3a9c75SAmlan Barua NM = dim[1]+1; 9093c3a9c75SAmlan Barua 9103c3a9c75SAmlan Barua for (i=0;i<local_n0;i++){ 9113c3a9c75SAmlan Barua for (j=0;j<dim[1];j++){ 9123c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 9133c3a9c75SAmlan Barua tempindx1 = i*NM + j; 9145b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 9153c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 9163c3a9c75SAmlan Barua } 9173c3a9c75SAmlan Barua } 9183c3a9c75SAmlan Barua 9193c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 9203c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 9213c3a9c75SAmlan Barua 922f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 923f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 924f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 925f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 926b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 927b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 928b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 929b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 930bd59e6c6SAmlan Barua #endif 9319496c9aeSAmlan Barua break; 9323c3a9c75SAmlan Barua 9333c3a9c75SAmlan Barua case 3: 934bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 935bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 936bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 937bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 938bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 939bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 940bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 941bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 942bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 943bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 944bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 945bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 946bd59e6c6SAmlan Barua #else 94751d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 94851d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 94951d1eed7SAmlan Barua 9509496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 9519496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 9529496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 9539496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 95451d1eed7SAmlan Barua 95551d1eed7SAmlan Barua if (dim[2]%2==0) 95651d1eed7SAmlan Barua NM = dim[2]+2; 95751d1eed7SAmlan Barua else 95851d1eed7SAmlan Barua NM = dim[2]+1; 95951d1eed7SAmlan Barua 96051d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 96151d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 96251d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 96351d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 96451d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 96551d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 96651d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 96751d1eed7SAmlan Barua } 96851d1eed7SAmlan Barua } 96951d1eed7SAmlan Barua } 97051d1eed7SAmlan Barua 97151d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 97251d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 97351d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 97451d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 97551d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 97651d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 977b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 978b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 979b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 980b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 981bd59e6c6SAmlan Barua #endif 9829496c9aeSAmlan Barua break; 9833c3a9c75SAmlan Barua 9843c3a9c75SAmlan Barua default: 985bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 986bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 987bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 988bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 989bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 990bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 991bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 993bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 994bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 995bd59e6c6SAmlan Barua #else 996e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 997e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 998e5d7f247SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 999e5d7f247SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1000e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1001e5d7f247SAmlan Barua 1002e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 1003e5d7f247SAmlan Barua 1004e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1005e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1006e5d7f247SAmlan Barua 1007e5d7f247SAmlan Barua if (dim[ndim-1]%2==0) 1008ba6e06d0SAmlan Barua NM = 2; 1009e5d7f247SAmlan Barua else 1010ba6e06d0SAmlan Barua NM = 1; 1011e5d7f247SAmlan Barua 10126971673cSAmlan Barua j = low; 10136971673cSAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 10146971673cSAmlan Barua { 10156971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 10166971673cSAmlan Barua indx2[i] = j; 10176971673cSAmlan Barua if (k%dim[ndim-1]==0) 10186971673cSAmlan Barua { j+=NM;} 10196971673cSAmlan Barua j++; 10206971673cSAmlan Barua } 10216971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 10226971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 10236971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 10246971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10256971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10266971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1027b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1028b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1029b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1030b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1031bd59e6c6SAmlan Barua #endif 10329496c9aeSAmlan Barua break; 10333c3a9c75SAmlan Barua } 1034e81bb599SAmlan Barua } 10353c3a9c75SAmlan Barua 10363c3a9c75SAmlan Barua return 0; 10373c3a9c75SAmlan Barua } 10383c3a9c75SAmlan Barua EXTERN_C_END 10393c3a9c75SAmlan Barua 10403c3a9c75SAmlan Barua #undef __FUNCT__ 10413c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT" 10423c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 10433c3a9c75SAmlan Barua { 10443c3a9c75SAmlan Barua PetscErrorCode ierr; 10453c3a9c75SAmlan Barua PetscFunctionBegin; 10463c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 10473c3a9c75SAmlan Barua PetscFunctionReturn(0); 10483c3a9c75SAmlan Barua } 104954efbe56SHong Zhang 10505b3e41ffSAmlan Barua /* 10515b3e41ffSAmlan Barua OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 10525b3e41ffSAmlan Barua Input A, x, y 10535b3e41ffSAmlan Barua A - FFTW matrix 10545b3e41ffSAmlan Barua x - FFTW vector 10555b3e41ffSAmlan Barua y - PETSc vector that user can read 10565b3e41ffSAmlan Barua Options Database Keys: 10575b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 10585b3e41ffSAmlan Barua 10595b3e41ffSAmlan Barua Level: intermediate 10605b3e41ffSAmlan Barua 10613c3a9c75SAmlan Barua */ 10623c3a9c75SAmlan Barua 10633c3a9c75SAmlan Barua EXTERN_C_BEGIN 10643c3a9c75SAmlan Barua #undef __FUNCT__ 10655b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW" 10665b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 10675b3e41ffSAmlan Barua { 10685b3e41ffSAmlan Barua PetscErrorCode ierr; 10695b3e41ffSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 10705b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 10715b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 10729496c9aeSAmlan Barua PetscInt N=fft->N; 1073b3655f67SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 10749496c9aeSAmlan Barua PetscInt low; 10759496c9aeSAmlan Barua PetscInt rank,size; 10765b3e41ffSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 10779496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 10785b3e41ffSAmlan Barua VecScatter vecscat; 10795b3e41ffSAmlan Barua IS list1,list2; 10809496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 10819496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 10829496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 10839496c9aeSAmlan Barua PetscInt N1, n1 ,NM; 10849496c9aeSAmlan Barua ptrdiff_t temp; 10859496c9aeSAmlan Barua #endif 10869496c9aeSAmlan Barua 10875b3e41ffSAmlan Barua 10885b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 10895b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1090b3655f67SAmlan Barua ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr); 10915b3e41ffSAmlan Barua 1092e81bb599SAmlan Barua if (size==1){ 10937536937bSAmlan Barua /* 10945b3e41ffSAmlan Barua switch (ndim){ 10955b3e41ffSAmlan Barua case 1: 1096e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 1097e81bb599SAmlan Barua for (i=0;i<dim[0];i++) 1098e81bb599SAmlan Barua { 1099e81bb599SAmlan Barua indx1[i] = i; 1100e81bb599SAmlan Barua } 1101e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1102e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1103e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1104e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1105e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1106b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1107b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1108e81bb599SAmlan Barua break; 1109e81bb599SAmlan Barua 1110e81bb599SAmlan Barua case 2: 1111e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 1112e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 1113e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 1114e81bb599SAmlan Barua indx1[i*dim[1]+j] = i*dim[1] + j; 1115e81bb599SAmlan Barua } 1116e81bb599SAmlan Barua } 1117e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1118e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1119e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1120e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1121e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1122b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1123b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1124e81bb599SAmlan Barua break; 1125e81bb599SAmlan Barua case 3: 1126e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1127e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 1128e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 1129e81bb599SAmlan Barua for (k=0;k<dim[2];k++){ 1130e81bb599SAmlan Barua indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 1131e81bb599SAmlan Barua } 1132e81bb599SAmlan Barua } 1133e81bb599SAmlan Barua } 1134e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1135e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1136e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1137e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1138e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1139b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1140b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1141e81bb599SAmlan Barua break; 1142e81bb599SAmlan Barua default: 11437536937bSAmlan Barua */ 1144b3655f67SAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 11456971673cSAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF); 11466971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 11476971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 11486971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 11496971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1150b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 11517536937bSAmlan Barua // break; 11527536937bSAmlan Barua // } 1153e81bb599SAmlan Barua } 1154e81bb599SAmlan Barua else{ 1155e81bb599SAmlan Barua 1156e81bb599SAmlan Barua switch (ndim){ 1157e81bb599SAmlan Barua case 1: 115864657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 115964657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 11609496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1); 11619496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2); 116264657d84SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 116364657d84SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 116464657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 116564657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 116664657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 116764657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 116864657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 116964657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 117064657d84SAmlan Barua #else 11716a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 117264657d84SAmlan Barua #endif 11735b3e41ffSAmlan Barua break; 11745b3e41ffSAmlan Barua case 2: 1175bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1176bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1177bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 1178bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 1179bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1180bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1181bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1182bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1183bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1184bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1185bd59e6c6SAmlan Barua #else 11865b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 11875b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 11885b3e41ffSAmlan Barua 11899496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 11909496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 11915b3e41ffSAmlan Barua 11925b3e41ffSAmlan Barua if (dim[1]%2==0) 11935b3e41ffSAmlan Barua NM = dim[1]+2; 11945b3e41ffSAmlan Barua else 11955b3e41ffSAmlan Barua NM = dim[1]+1; 11965b3e41ffSAmlan Barua 11975b3e41ffSAmlan Barua for (i=0;i<local_n0;i++){ 11985b3e41ffSAmlan Barua for (j=0;j<dim[1];j++){ 11995b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 12005b3e41ffSAmlan Barua tempindx1 = i*NM + j; 12015b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 12025b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 12035b3e41ffSAmlan Barua } 12045b3e41ffSAmlan Barua } 12055b3e41ffSAmlan Barua 12065b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 12075b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 12085b3e41ffSAmlan Barua 12095b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 12105b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12115b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12125b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1213b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1214b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1215b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1216b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1217bd59e6c6SAmlan Barua #endif 12189496c9aeSAmlan Barua break; 12195b3e41ffSAmlan Barua case 3: 1220bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1221bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1222bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1223bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1224bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1225bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1226bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1227bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1228bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1229bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1230bd59e6c6SAmlan Barua #else 1231bd59e6c6SAmlan Barua 123251d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 123351d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 123451d1eed7SAmlan Barua 12359496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 12369496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 12379496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 12389496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 123951d1eed7SAmlan Barua 124051d1eed7SAmlan Barua if (dim[2]%2==0) 124151d1eed7SAmlan Barua NM = dim[2]+2; 124251d1eed7SAmlan Barua else 124351d1eed7SAmlan Barua NM = dim[2]+1; 124451d1eed7SAmlan Barua 124551d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 124651d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 124751d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 124851d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 124951d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 125051d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 125151d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 125251d1eed7SAmlan Barua } 125351d1eed7SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 125451d1eed7SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 125551d1eed7SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 125651d1eed7SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 125751d1eed7SAmlan Barua } 125851d1eed7SAmlan Barua } 125951d1eed7SAmlan Barua 126051d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 126151d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 126251d1eed7SAmlan Barua 126351d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 126451d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 126551d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 126651d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1267b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1268b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1269b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1270b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1271bd59e6c6SAmlan Barua #endif 12729496c9aeSAmlan Barua break; 12735b3e41ffSAmlan Barua default: 1274bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1275bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1276bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1277bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1278bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1279bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1280bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1281bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1282bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1283bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1284bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1285bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1286bd59e6c6SAmlan Barua #else 1287ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1288ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1289ba6e06d0SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1290ba6e06d0SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1291ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1292ba6e06d0SAmlan Barua 1293ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1294ba6e06d0SAmlan Barua 1295ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1296ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1297ba6e06d0SAmlan Barua 1298ba6e06d0SAmlan Barua if (dim[ndim-1]%2==0) 1299ba6e06d0SAmlan Barua NM = 2; 1300ba6e06d0SAmlan Barua else 1301ba6e06d0SAmlan Barua NM = 1; 1302ba6e06d0SAmlan Barua 1303ba6e06d0SAmlan Barua j = low; 1304ba6e06d0SAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1305ba6e06d0SAmlan Barua { 1306ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1307ba6e06d0SAmlan Barua indx2[i] = j; 1308ba6e06d0SAmlan Barua if (k%dim[ndim-1]==0) 1309ba6e06d0SAmlan Barua { j+=NM;} 1310ba6e06d0SAmlan Barua j++; 1311ba6e06d0SAmlan Barua } 1312ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1313ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1314ba6e06d0SAmlan Barua 1315ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1316ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1317ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1318ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1319b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1320b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1321b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1322b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1323bd59e6c6SAmlan Barua #endif 13249496c9aeSAmlan Barua break; 13255b3e41ffSAmlan Barua } 1326e81bb599SAmlan Barua } 13275b3e41ffSAmlan Barua return 0; 13285b3e41ffSAmlan Barua } 13295b3e41ffSAmlan Barua EXTERN_C_END 13305b3e41ffSAmlan Barua 13315b3e41ffSAmlan Barua EXTERN_C_BEGIN 13325b3e41ffSAmlan Barua #undef __FUNCT__ 13333c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 13343c3a9c75SAmlan Barua /* 13353c3a9c75SAmlan Barua MatCreate_FFTW - Creates a matrix object that provides FFT 13363c3a9c75SAmlan Barua via the external package FFTW 13373c3a9c75SAmlan Barua Options Database Keys: 13383c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 13393c3a9c75SAmlan Barua 13403c3a9c75SAmlan Barua Level: intermediate 13413c3a9c75SAmlan Barua 13423c3a9c75SAmlan Barua */ 13433c3a9c75SAmlan Barua 1344b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 1345b2b77a04SHong Zhang { 1346b2b77a04SHong Zhang PetscErrorCode ierr; 1347b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 1348b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1349b2b77a04SHong Zhang Mat_FFTW *fftw; 1350b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1351b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1352b2b77a04SHong Zhang PetscBool flg; 13534f48bc67SAmlan Barua PetscInt p_flag,partial_dim=1,ctr; 135411902ff2SHong Zhang PetscMPIInt size,rank; 13559496c9aeSAmlan Barua ptrdiff_t *pdim; 13569496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 13579496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 13589496c9aeSAmlan Barua ptrdiff_t temp; 1359*8ca4c034SAmlan Barua PetscInt N1,tot_dim; 13604f48bc67SAmlan Barua #else 13614f48bc67SAmlan Barua PetscInt n1; 13629496c9aeSAmlan Barua #endif 13639496c9aeSAmlan Barua 1364b2b77a04SHong Zhang 1365b2b77a04SHong Zhang PetscFunctionBegin; 1366b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 136711902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 136854dd5118SAmlan Barua ierr = MPI_Barrier(PETSC_COMM_WORLD); 1369c92418ccSAmlan Barua 137011902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 137111902ff2SHong Zhang pdim[0] = dim[0]; 1372*8ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1373*8ca4c034SAmlan Barua tot_dim = 2*dim[0]; 1374*8ca4c034SAmlan Barua #endif 137511902ff2SHong Zhang for (ctr=1;ctr<ndim;ctr++) 137611902ff2SHong Zhang { 13776882372aSHong Zhang partial_dim *= dim[ctr]; 137811902ff2SHong Zhang pdim[ctr] = dim[ctr]; 1379*8ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1380*8ca4c034SAmlan Barua if (ctr==ndim-1) 1381*8ca4c034SAmlan Barua tot_dim *= (dim[ctr]/2+1); 1382*8ca4c034SAmlan Barua else 1383*8ca4c034SAmlan Barua tot_dim *= dim[ctr]; 1384*8ca4c034SAmlan Barua #endif 13856882372aSHong Zhang } 13863c3a9c75SAmlan Barua 1387b2b77a04SHong Zhang if (size == 1) { 1388e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1389b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1390b2b77a04SHong Zhang n = N; 1391e81bb599SAmlan Barua #else 1392*8ca4c034SAmlan Barua //printf("The value of N is %d\n",N); 1393*8ca4c034SAmlan Barua //tot_dim = 2*N*(dim[ndim-1]/2+1); 1394*8ca4c034SAmlan Barua //tot_dim = ((long int)tot_dim)/dim[ndim-1]; 1395*8ca4c034SAmlan Barua //printf("The value of is %ld and %d\n",tot_dim,dim[ndim-1]); 1396e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1397e81bb599SAmlan Barua n = tot_dim; 1398e81bb599SAmlan Barua #endif 1399e81bb599SAmlan Barua 1400b2b77a04SHong Zhang } else { 14019496c9aeSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start;//local_1_end; 1402b2b77a04SHong Zhang switch (ndim){ 1403b2b77a04SHong Zhang case 1: 14043c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 14053c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1406e5d7f247SAmlan Barua #else 14079496c9aeSAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 14086882372aSHong Zhang n = (PetscInt)local_n0; 14099496c9aeSAmlan Barua n1 = (PetscInt) local_n1; 14109496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1411e5d7f247SAmlan Barua #endif 1412b2b77a04SHong Zhang break; 1413b2b77a04SHong Zhang case 2: 14145b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 1415b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1416b2b77a04SHong Zhang /* 1417b2b77a04SHong Zhang PetscMPIInt rank; 1418b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1419b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 1420b2b77a04SHong Zhang */ 1421b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1422b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 14235b3e41ffSAmlan Barua #else 14245b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 14255b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1426c8a8a4f0SAmlan Barua // n1 = 2*(PetscInt)local_n1*(dim[0]); 1427c8a8a4f0SAmlan Barua // ierr = MatSetSizes(A,n1,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1428c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 14295b3e41ffSAmlan Barua #endif 1430b2b77a04SHong Zhang break; 1431b2b77a04SHong Zhang case 3: 143251d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 143351d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 14346882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 14356882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 143651d1eed7SAmlan Barua #else 143751d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 143851d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1439c8a8a4f0SAmlan Barua // n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 1440c8a8a4f0SAmlan Barua // ierr = MatSetSizes(A,n1,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1441c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 144251d1eed7SAmlan Barua #endif 1443b2b77a04SHong Zhang break; 1444b2b77a04SHong Zhang default: 1445b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 144611902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 14476882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 14486882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1449b3a17365SAmlan Barua #else 1450b3a17365SAmlan Barua temp = pdim[ndim-1]; 1451b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 1452b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1453b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1454b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1455b3a17365SAmlan Barua pdim[ndim-1] = temp; 1456c8a8a4f0SAmlan Barua // temp = partial_dim*(dim[ndim-1]/2+1)*dim[0]/(dim[1]*dim[ndim-1]); 1457c8a8a4f0SAmlan Barua // n1 = 2*local_n1*temp; 1458c8a8a4f0SAmlan Barua // ierr = MatSetSizes(A,n1,n,N1,N1);CHKERRQ(ierr); 1459c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1460b3a17365SAmlan Barua #endif 1461b2b77a04SHong Zhang break; 1462b2b77a04SHong Zhang } 1463b2b77a04SHong Zhang } 1464b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1465b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1466b2b77a04SHong Zhang fft->data = (void*)fftw; 1467b2b77a04SHong Zhang 1468b2b77a04SHong Zhang fft->n = n; 1469c92418ccSAmlan Barua fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1470e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 1471c92418ccSAmlan Barua ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1472b1301b2eSHong Zhang for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1473c92418ccSAmlan Barua 1474b2b77a04SHong Zhang fftw->p_forward = 0; 1475b2b77a04SHong Zhang fftw->p_backward = 0; 1476b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1477b2b77a04SHong Zhang 1478b2b77a04SHong Zhang if (size == 1){ 1479b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1480b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1481b2b77a04SHong Zhang } else { 1482b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1483b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1484b2b77a04SHong Zhang } 1485b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 14866a506ed8SAmlan Barua // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D 14874be45526SHong Zhang //A->ops->getvecs = MatGetVecs_FFTW; 1488b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 14894be45526SHong Zhang PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW); 14903c3a9c75SAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 14915b3e41ffSAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 1492b2b77a04SHong Zhang 1493b2b77a04SHong Zhang /* get runtime options */ 1494b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1495b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1496b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 1497b2b77a04SHong Zhang PetscOptionsEnd(); 1498b2b77a04SHong Zhang PetscFunctionReturn(0); 1499b2b77a04SHong Zhang } 1500b2b77a04SHong Zhang EXTERN_C_END 15013c3a9c75SAmlan Barua 15023c3a9c75SAmlan Barua 15033c3a9c75SAmlan Barua 15043c3a9c75SAmlan Barua 1505