1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14e5d7f247SAmlan Barua PetscInt partial_dim; 15b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 16b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 19b2b77a04SHong Zhang } Mat_FFTW; 20b2b77a04SHong Zhang 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 28b2b77a04SHong Zhang 29b2b77a04SHong Zhang #undef __FUNCT__ 30b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 31b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 32b2b77a04SHong Zhang { 33b2b77a04SHong Zhang PetscErrorCode ierr; 34b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 35b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 36b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 37b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 38b2b77a04SHong Zhang 39b2b77a04SHong Zhang PetscFunctionBegin; 40b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 41b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 42b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 43b2b77a04SHong Zhang switch (ndim){ 44b2b77a04SHong Zhang case 1: 4558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 46b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 4758a912c5SAmlan Barua #else 4858a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 4958a912c5SAmlan Barua #endif 50b2b77a04SHong Zhang break; 51b2b77a04SHong Zhang case 2: 5258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 53b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 5458a912c5SAmlan Barua #else 5558a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 5658a912c5SAmlan Barua #endif 57b2b77a04SHong Zhang break; 58b2b77a04SHong Zhang case 3: 5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 60b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6158a912c5SAmlan Barua #else 6258a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 6358a912c5SAmlan Barua #endif 64b2b77a04SHong Zhang break; 65b2b77a04SHong Zhang default: 6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 67b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6858a912c5SAmlan Barua #else 6958a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 7058a912c5SAmlan Barua #endif 71b2b77a04SHong Zhang break; 72b2b77a04SHong Zhang } 73b2b77a04SHong Zhang fftw->finarray = x_array; 74b2b77a04SHong Zhang fftw->foutarray = y_array; 75b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 76b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 77b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 78b2b77a04SHong Zhang } else { /* use existing plan */ 79b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 80b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 81b2b77a04SHong Zhang } else { 82b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 83b2b77a04SHong Zhang } 84b2b77a04SHong Zhang } 85b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 86b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 87b2b77a04SHong Zhang PetscFunctionReturn(0); 88b2b77a04SHong Zhang } 89b2b77a04SHong Zhang 90b2b77a04SHong Zhang #undef __FUNCT__ 91b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 92b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 93b2b77a04SHong Zhang { 94b2b77a04SHong Zhang PetscErrorCode ierr; 95b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 96b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 97b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 98b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 99b2b77a04SHong Zhang 100b2b77a04SHong Zhang PetscFunctionBegin; 101b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 102b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 103b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 104b2b77a04SHong Zhang switch (ndim){ 105b2b77a04SHong Zhang case 1: 10658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 107b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 10858a912c5SAmlan Barua #else 109e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 11058a912c5SAmlan Barua #endif 111b2b77a04SHong Zhang break; 112b2b77a04SHong Zhang case 2: 11358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 114b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 11558a912c5SAmlan Barua #else 116e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 11758a912c5SAmlan Barua #endif 118b2b77a04SHong Zhang break; 119b2b77a04SHong Zhang case 3: 12058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 121b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12258a912c5SAmlan Barua #else 123e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 12458a912c5SAmlan Barua #endif 125b2b77a04SHong Zhang break; 126b2b77a04SHong Zhang default: 12758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 128b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12958a912c5SAmlan Barua #else 130e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 13158a912c5SAmlan Barua #endif 132b2b77a04SHong Zhang break; 133b2b77a04SHong Zhang } 134b2b77a04SHong Zhang fftw->binarray = x_array; 135b2b77a04SHong Zhang fftw->boutarray = y_array; 136b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 137b2b77a04SHong Zhang } else { /* use existing plan */ 138b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 139b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 140b2b77a04SHong Zhang } else { 141b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 142b2b77a04SHong Zhang } 143b2b77a04SHong Zhang } 144b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 145b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 146b2b77a04SHong Zhang PetscFunctionReturn(0); 147b2b77a04SHong Zhang } 148b2b77a04SHong Zhang 149b2b77a04SHong Zhang #undef __FUNCT__ 150b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 151b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 152b2b77a04SHong Zhang { 153b2b77a04SHong Zhang PetscErrorCode ierr; 154b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 155b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 156b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 157c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 158b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 159b2b77a04SHong Zhang 160b2b77a04SHong Zhang PetscFunctionBegin; 161b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 162b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 163b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 164b2b77a04SHong Zhang switch (ndim){ 165b2b77a04SHong Zhang case 1: 1663c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 167b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 168ae0a50aaSHong Zhang #else 169ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 1703c3a9c75SAmlan Barua #endif 171b2b77a04SHong Zhang break; 172b2b77a04SHong Zhang case 2: 1733c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 174b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1753c3a9c75SAmlan Barua #else 1763c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1773c3a9c75SAmlan Barua #endif 178b2b77a04SHong Zhang break; 179b2b77a04SHong Zhang case 3: 1803c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 181b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1823c3a9c75SAmlan Barua #else 18351d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1843c3a9c75SAmlan Barua #endif 185b2b77a04SHong Zhang break; 186b2b77a04SHong Zhang default: 1873c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 188c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1893c3a9c75SAmlan Barua #else 1903c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1913c3a9c75SAmlan Barua #endif 192b2b77a04SHong Zhang break; 193b2b77a04SHong Zhang } 194b2b77a04SHong Zhang fftw->finarray = x_array; 195b2b77a04SHong Zhang fftw->foutarray = y_array; 196b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 197b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 198b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 199b2b77a04SHong Zhang } else { /* use existing plan */ 200b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 201b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 202b2b77a04SHong Zhang } else { 203b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 204b2b77a04SHong Zhang } 205b2b77a04SHong Zhang } 206b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 207b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 208b2b77a04SHong Zhang PetscFunctionReturn(0); 209b2b77a04SHong Zhang } 210b2b77a04SHong Zhang 211b2b77a04SHong Zhang #undef __FUNCT__ 212b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 213b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 214b2b77a04SHong Zhang { 215b2b77a04SHong Zhang PetscErrorCode ierr; 216b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 217b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 218b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 219c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 220b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 221b2b77a04SHong Zhang 222b2b77a04SHong Zhang PetscFunctionBegin; 223b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 224b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 225b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 226b2b77a04SHong Zhang switch (ndim){ 227b2b77a04SHong Zhang case 1: 2283c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 229b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 230ae0a50aaSHong Zhang #else 231ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 2323c3a9c75SAmlan Barua #endif 233b2b77a04SHong Zhang break; 234b2b77a04SHong Zhang case 2: 2353c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 236b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2373c3a9c75SAmlan Barua #else 2383c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2393c3a9c75SAmlan Barua #endif 240b2b77a04SHong Zhang break; 241b2b77a04SHong Zhang case 3: 2423c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 243b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2443c3a9c75SAmlan Barua #else 2453c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2463c3a9c75SAmlan Barua #endif 247b2b77a04SHong Zhang break; 248b2b77a04SHong Zhang default: 2493c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 250c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2513c3a9c75SAmlan Barua #else 2523c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2533c3a9c75SAmlan Barua #endif 254b2b77a04SHong Zhang break; 255b2b77a04SHong Zhang } 256b2b77a04SHong Zhang fftw->binarray = x_array; 257b2b77a04SHong Zhang fftw->boutarray = y_array; 258b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 259b2b77a04SHong Zhang } else { /* use existing plan */ 260b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 261b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 262b2b77a04SHong Zhang } else { 263b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 264b2b77a04SHong Zhang } 265b2b77a04SHong Zhang } 266b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 267b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 268b2b77a04SHong Zhang PetscFunctionReturn(0); 269b2b77a04SHong Zhang } 270b2b77a04SHong Zhang 271b2b77a04SHong Zhang #undef __FUNCT__ 272b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 273b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 274b2b77a04SHong Zhang { 275b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 276b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 277b2b77a04SHong Zhang PetscErrorCode ierr; 278b2b77a04SHong Zhang 279b2b77a04SHong Zhang PetscFunctionBegin; 280b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 281b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 282b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 283bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 284b2b77a04SHong Zhang PetscFunctionReturn(0); 285b2b77a04SHong Zhang } 286b2b77a04SHong Zhang 287c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 288b2b77a04SHong Zhang #undef __FUNCT__ 289b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 290b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 291b2b77a04SHong Zhang { 292b2b77a04SHong Zhang PetscErrorCode ierr; 293b2b77a04SHong Zhang PetscScalar *array; 294b2b77a04SHong Zhang 295b2b77a04SHong Zhang PetscFunctionBegin; 296b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 297b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 298b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 299b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 300b2b77a04SHong Zhang PetscFunctionReturn(0); 301b2b77a04SHong Zhang } 302b2b77a04SHong Zhang 303b2b77a04SHong Zhang #undef __FUNCT__ 3043c3a9c75SAmlan Barua #define __FUNCT__ "MatGetVecs1DC_FFTW" 305c92418ccSAmlan Barua /* 306c92418ccSAmlan Barua MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the 307c92418ccSAmlan Barua parallel layout determined by FFTW-1D 308c92418ccSAmlan Barua 309c92418ccSAmlan Barua */ 3106a506ed8SAmlan Barua PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bout) 311c92418ccSAmlan Barua { 312c92418ccSAmlan Barua PetscErrorCode ierr; 313c92418ccSAmlan Barua PetscMPIInt size,rank; 314c92418ccSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 315c92418ccSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 316c92418ccSAmlan Barua // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 317c92418ccSAmlan Barua PetscInt N=fft->N; 318c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 319c92418ccSAmlan Barua ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 320c92418ccSAmlan Barua ptrdiff_t f_local_n1,f_local_1_end; 321c92418ccSAmlan Barua ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 322c92418ccSAmlan Barua ptrdiff_t b_local_n1,b_local_1_end; 323ae0a50aaSHong Zhang fftw_complex *data_fin,*data_fout,*data_bout; 324c92418ccSAmlan Barua 325c92418ccSAmlan Barua PetscFunctionBegin; 326c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 327c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 328c92418ccSAmlan Barua #endif 329c92418ccSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 330c92418ccSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 331c92418ccSAmlan Barua if (size == 1){ 332c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 333c92418ccSAmlan Barua } 334c92418ccSAmlan Barua else { 335c92418ccSAmlan Barua if (ndim>1){ 336c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 337c92418ccSAmlan Barua else { 338c92418ccSAmlan Barua f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 339c92418ccSAmlan Barua b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 340c92418ccSAmlan Barua if (fin) { 341c92418ccSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 342c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 343c92418ccSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 344c92418ccSAmlan Barua } 345c92418ccSAmlan Barua if (fout) { 346c92418ccSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 347c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 348c92418ccSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 349c92418ccSAmlan Barua } 350c92418ccSAmlan Barua if (bout) { 351c92418ccSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 352c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 353c92418ccSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 354c92418ccSAmlan Barua } 355c92418ccSAmlan Barua } 356c92418ccSAmlan Barua if (fin){ 357c92418ccSAmlan Barua ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 358c92418ccSAmlan Barua } 359c92418ccSAmlan Barua if (fout){ 360c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 361c92418ccSAmlan Barua } 362c92418ccSAmlan Barua if (bout){ 363c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 364c92418ccSAmlan Barua } 365c92418ccSAmlan Barua PetscFunctionReturn(0); 366c92418ccSAmlan Barua } 367c92418ccSAmlan Barua 368c92418ccSAmlan Barua 369c92418ccSAmlan Barua } 3703c3a9c75SAmlan Barua 371c92418ccSAmlan Barua #undef __FUNCT__ 372b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW" 373b2b77a04SHong Zhang /* 374b2b77a04SHong Zhang MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 375b2b77a04SHong Zhang parallel layout determined by FFTW 376b2b77a04SHong Zhang 377b2b77a04SHong Zhang Collective on Mat 378b2b77a04SHong Zhang 379b2b77a04SHong Zhang Input Parameter: 380b2b77a04SHong Zhang . mat - the matrix 381b2b77a04SHong Zhang 382b2b77a04SHong Zhang Output Parameter: 383b2b77a04SHong Zhang + fin - (optional) input vector of forward FFTW 384b2b77a04SHong Zhang - fout - (optional) output vector of forward FFTW 385b2b77a04SHong Zhang 386b2b77a04SHong Zhang Level: advanced 387b2b77a04SHong Zhang 388b2b77a04SHong Zhang .seealso: MatCreateFFTW() 389b2b77a04SHong Zhang */ 390b2b77a04SHong Zhang PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 391b2b77a04SHong Zhang { 392b2b77a04SHong Zhang PetscErrorCode ierr; 393b2b77a04SHong Zhang PetscMPIInt size,rank; 394b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 395b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 396c92418ccSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 3973c3a9c75SAmlan Barua PetscInt N=fft->N, N1, n1,vsize; 398e81bb599SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 399b2b77a04SHong Zhang 400b2b77a04SHong Zhang PetscFunctionBegin; 401b2b77a04SHong Zhang PetscValidHeaderSpecific(A,MAT_CLASSID,1); 402b2b77a04SHong Zhang PetscValidType(A,1); 403b2b77a04SHong Zhang 404b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 405b2b77a04SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 406b2b77a04SHong Zhang if (size == 1){ /* sequential case */ 407e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX) 408b2b77a04SHong Zhang if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 409b2b77a04SHong Zhang if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 410e5d7f247SAmlan Barua #else 411e81bb599SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 412e81bb599SAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 413e81bb599SAmlan Barua #endif 414b2b77a04SHong Zhang } else { /* mpi case */ 415b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 4166882372aSHong Zhang ptrdiff_t local_n1,local_1_end; 417b2b77a04SHong Zhang fftw_complex *data_fin,*data_fout; 41851d1eed7SAmlan Barua double *data_finr ; 419b3a17365SAmlan Barua ptrdiff_t local_1_start,temp; 420c92418ccSAmlan Barua // PetscInt ctr; 421c92418ccSAmlan Barua // ptrdiff_t ndim1,*pdim; 422c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 423c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 42411902ff2SHong Zhang 425c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 426f76f76beSAmlan Barua // {k 427c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 428c92418ccSAmlan Barua // } 429b2b77a04SHong Zhang 430f76f76beSAmlan Barua 431f76f76beSAmlan Barua 432b2b77a04SHong Zhang switch (ndim){ 433b2b77a04SHong Zhang case 1: 4346882372aSHong Zhang /* Get local size */ 435e5d7f247SAmlan Barua /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */ 436c92418ccSAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 4376882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 4386882372aSHong Zhang if (fin) { 4396882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4406882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4416882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4426882372aSHong Zhang } 4436882372aSHong Zhang if (fout) { 4446882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4456882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4466882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4476882372aSHong Zhang } 448b2b77a04SHong Zhang break; 449b2b77a04SHong Zhang case 2: 4503c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4513c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4523c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 4533c3a9c75SAmlan Barua if (fin) { 4543c3a9c75SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 45554dd5118SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 4563c3a9c75SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 45709dd8a53SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 4583c3a9c75SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4593c3a9c75SAmlan Barua } 4603c3a9c75SAmlan Barua if (fout) { 46109dd8a53SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 46209dd8a53SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4633c3a9c75SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4643c3a9c75SAmlan Barua } 465f76f76beSAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 4663c3a9c75SAmlan Barua 4673c3a9c75SAmlan Barua #else 468b2b77a04SHong Zhang /* Get local size */ 46909dd8a53SAmlan Barua printf("Hope this does not come here"); 470b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 471b2b77a04SHong Zhang if (fin) { 472b2b77a04SHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 473b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 474b2b77a04SHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 475b2b77a04SHong Zhang } 476b2b77a04SHong Zhang if (fout) { 477b2b77a04SHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 478b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 479b2b77a04SHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 480b2b77a04SHong Zhang } 4813c3a9c75SAmlan Barua printf("Hope this does not come here"); 4823c3a9c75SAmlan Barua #endif 483b2b77a04SHong Zhang break; 484b2b77a04SHong Zhang case 3: 4856882372aSHong Zhang /* Get local size */ 4863c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 48751d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 48851d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 48951d1eed7SAmlan Barua if (fin) { 49051d1eed7SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 49151d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 49251d1eed7SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 49351d1eed7SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 49451d1eed7SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 49551d1eed7SAmlan Barua } 49651d1eed7SAmlan Barua if (fout) { 49751d1eed7SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 49851d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 49951d1eed7SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 50051d1eed7SAmlan Barua } 50151d1eed7SAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 50251d1eed7SAmlan Barua 50351d1eed7SAmlan Barua 5043c3a9c75SAmlan Barua #else 5056882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 50611902ff2SHong Zhang // printf("The quantity n is %d",n); 5076882372aSHong Zhang if (fin) { 5086882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5096882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5106882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5116882372aSHong Zhang } 5126882372aSHong Zhang if (fout) { 5136882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5146882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5156882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5166882372aSHong Zhang } 5173c3a9c75SAmlan Barua #endif 518b2b77a04SHong Zhang break; 519b2b77a04SHong Zhang default: 5206882372aSHong Zhang /* Get local size */ 5213c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 522b3a17365SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 523b3a17365SAmlan Barua printf("The value of temp is %ld\n",temp); 524b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 525b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 526b3a17365SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 527b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 528b3a17365SAmlan Barua if (fin) { 529b3a17365SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 530b3a17365SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 531b3a17365SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 532b3a17365SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 533b3a17365SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 534b3a17365SAmlan Barua } 535b3a17365SAmlan Barua if (fout) { 536b3a17365SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 537b3a17365SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 538b3a17365SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 539b3a17365SAmlan Barua } 540b3a17365SAmlan Barua 5413c3a9c75SAmlan Barua #else 542c92418ccSAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 54311902ff2SHong Zhang // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 54411902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 54511902ff2SHong Zhang // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 54611902ff2SHong Zhang // for(i=0;i<ndim;i++) 54711902ff2SHong Zhang // { 54811902ff2SHong Zhang // pdim[i]=dim[i];printf("%d",pdim[i]); 54911902ff2SHong Zhang // } 55011902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 55111902ff2SHong Zhang // printf("The quantity n is %d",n); 5526882372aSHong Zhang if (fin) { 5536882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5546882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5556882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5566882372aSHong Zhang } 5576882372aSHong Zhang if (fout) { 5586882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5596882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5606882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5616882372aSHong Zhang } 5623c3a9c75SAmlan Barua #endif 563b2b77a04SHong Zhang break; 564b2b77a04SHong Zhang } 565b2b77a04SHong Zhang } 56654dd5118SAmlan Barua // if (fin){ 56754dd5118SAmlan Barua // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 56854dd5118SAmlan Barua // } 56954dd5118SAmlan Barua // if (fout){ 57054dd5118SAmlan Barua // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 57154dd5118SAmlan Barua // } 572b2b77a04SHong Zhang PetscFunctionReturn(0); 573b2b77a04SHong Zhang } 574b2b77a04SHong Zhang 575b2b77a04SHong Zhang #undef __FUNCT__ 5763c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT" 5773c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 5783c3a9c75SAmlan Barua { 5793c3a9c75SAmlan Barua PetscErrorCode ierr; 5803c3a9c75SAmlan Barua PetscFunctionBegin; 5813c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 5823c3a9c75SAmlan Barua PetscFunctionReturn(0); 5833c3a9c75SAmlan Barua } 58454efbe56SHong Zhang 585b2b77a04SHong Zhang /* 5863c3a9c75SAmlan Barua InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block 5873c3a9c75SAmlan Barua Input A, x, y 5883c3a9c75SAmlan Barua A - FFTW matrix 58954dd5118SAmlan Barua x - user data 590b2b77a04SHong Zhang Options Database Keys: 591b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 592b2b77a04SHong Zhang 593b2b77a04SHong Zhang Level: intermediate 594b2b77a04SHong Zhang 595b2b77a04SHong Zhang */ 5963c3a9c75SAmlan Barua 5973c3a9c75SAmlan Barua EXTERN_C_BEGIN 5983c3a9c75SAmlan Barua #undef __FUNCT__ 5993c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW" 6003c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 6013c3a9c75SAmlan Barua { 6023c3a9c75SAmlan Barua PetscErrorCode ierr; 6033c3a9c75SAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 6043c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 6053c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 6063c3a9c75SAmlan Barua PetscInt N=fft->N, N1, n1 ,NM; 607b223cc97SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 608b223cc97SAmlan Barua PetscInt low, *indx1, *indx2, tempindx, tempindx1; 609e5d7f247SAmlan Barua PetscInt i,j,k,rank,size,partial_dim; 6103c3a9c75SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 611e5d7f247SAmlan Barua ptrdiff_t local_n1,local_1_start,temp; 6123c3a9c75SAmlan Barua VecScatter vecscat; 6133c3a9c75SAmlan Barua IS list1,list2; 6143c3a9c75SAmlan Barua 615f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 616f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 6173c3a9c75SAmlan Barua ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 618f76f76beSAmlan Barua printf("Local ownership starts at %d\n",low); 6193c3a9c75SAmlan Barua 620e81bb599SAmlan Barua if (size==1) 621e81bb599SAmlan Barua { 6227536937bSAmlan Barua /* switch (ndim){ 623e81bb599SAmlan Barua case 1: 624e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 625e81bb599SAmlan Barua for (i=0;i<dim[0];i++) 626e81bb599SAmlan Barua { 627e81bb599SAmlan Barua indx1[i] = i; 628e81bb599SAmlan Barua } 629e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 630e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 631e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 632e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 633e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 634b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 635b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 636e81bb599SAmlan Barua break; 637e81bb599SAmlan Barua 638e81bb599SAmlan Barua case 2: 639e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 640e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 641e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 642e81bb599SAmlan Barua indx1[i*dim[1]+j] = i*dim[1] + j; 643e81bb599SAmlan Barua } 644e81bb599SAmlan Barua } 645e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 646e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 647e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 648e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 649e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 650b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 651b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 652e81bb599SAmlan Barua break; 653e81bb599SAmlan Barua case 3: 654e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 655e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 656e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 657e81bb599SAmlan Barua for (k=0;k<dim[2];k++){ 658e81bb599SAmlan Barua indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 659e81bb599SAmlan Barua } 660e81bb599SAmlan Barua } 661e81bb599SAmlan Barua } 662e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 663e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 664e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 665e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 666e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 667b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 668b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 669e81bb599SAmlan Barua break; 670e81bb599SAmlan Barua default: 6717536937bSAmlan Barua */ 6726971673cSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1); 6736971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 6746971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6756971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6766971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 677b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 6786971673cSAmlan Barua //ierr = ISDestroy(list1);CHKERRQ(ierr); 6797536937bSAmlan Barua // break; 6807536937bSAmlan Barua // } 681e81bb599SAmlan Barua } 682e81bb599SAmlan Barua 683e81bb599SAmlan Barua else{ 684e81bb599SAmlan Barua 6853c3a9c75SAmlan Barua switch (ndim){ 6863c3a9c75SAmlan Barua case 1: 687e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 6883c3a9c75SAmlan Barua break; 6893c3a9c75SAmlan Barua case 2: 690*bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 691*bd59e6c6SAmlan Barua //PetscInt my_value; 692*bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 693*bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 694*bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 695*bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 696*bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 697*bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 698*bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 699*bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 700*bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 701*bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 702*bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 703*bd59e6c6SAmlan Barua break; 704*bd59e6c6SAmlan Barua #else 7053c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 7063c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 7073c3a9c75SAmlan Barua 7085b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 7095b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 7105b3e41ffSAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 7113c3a9c75SAmlan Barua 7123c3a9c75SAmlan Barua if (dim[1]%2==0) 7133c3a9c75SAmlan Barua NM = dim[1]+2; 7143c3a9c75SAmlan Barua else 7153c3a9c75SAmlan Barua NM = dim[1]+1; 7163c3a9c75SAmlan Barua 7173c3a9c75SAmlan Barua for (i=0;i<local_n0;i++){ 7183c3a9c75SAmlan Barua for (j=0;j<dim[1];j++){ 7193c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 7203c3a9c75SAmlan Barua tempindx1 = i*NM + j; 7215b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 7223c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 7235b3e41ffSAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 7245b3e41ffSAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 7255b3e41ffSAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 7265b3e41ffSAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 7273c3a9c75SAmlan Barua } 7283c3a9c75SAmlan Barua } 7293c3a9c75SAmlan Barua 7303c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 7313c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 7323c3a9c75SAmlan Barua 733f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 734f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 735f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 736f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 737b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 738b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 739b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 740b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 7413c3a9c75SAmlan Barua break; 742*bd59e6c6SAmlan Barua #endif 7433c3a9c75SAmlan Barua 7443c3a9c75SAmlan Barua case 3: 745*bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 746*bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 747*bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 748*bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 749*bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 750*bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 751*bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 752*bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 753*bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 754*bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 755*bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 756*bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 757*bd59e6c6SAmlan Barua break; 758*bd59e6c6SAmlan Barua #else 75951d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 76051d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 76151d1eed7SAmlan Barua 76251d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 76351d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 76451d1eed7SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 76551d1eed7SAmlan Barua 76651d1eed7SAmlan Barua if (dim[2]%2==0) 76751d1eed7SAmlan Barua NM = dim[2]+2; 76851d1eed7SAmlan Barua else 76951d1eed7SAmlan Barua NM = dim[2]+1; 77051d1eed7SAmlan Barua 77151d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 77251d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 77351d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 77451d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 77551d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 77651d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 77751d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 77851d1eed7SAmlan Barua } 77951d1eed7SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 78051d1eed7SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 78151d1eed7SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 78251d1eed7SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 78351d1eed7SAmlan Barua } 78451d1eed7SAmlan Barua } 78551d1eed7SAmlan Barua 78651d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 78751d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 78851d1eed7SAmlan Barua 78951d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 79051d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 79151d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 79251d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 793b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 794b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 795b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 796b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 7973c3a9c75SAmlan Barua break; 798*bd59e6c6SAmlan Barua #endif 7993c3a9c75SAmlan Barua 8003c3a9c75SAmlan Barua default: 801*bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 802*bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 803*bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 804*bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 805*bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 806*bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 807*bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 808*bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 809*bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 810*bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 811*bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 812*bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 813*bd59e6c6SAmlan Barua 814*bd59e6c6SAmlan Barua 815*bd59e6c6SAmlan Barua #else 816e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 817e5d7f247SAmlan Barua printf("The value of temp is %ld\n",temp); 818e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 819e5d7f247SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 820e5d7f247SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 821e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 822e5d7f247SAmlan Barua 823e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 824e5d7f247SAmlan Barua printf("The value of partial dim is %d\n",partial_dim); 825e5d7f247SAmlan Barua 826e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 827e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 828e5d7f247SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 829e5d7f247SAmlan Barua 830e5d7f247SAmlan Barua if (dim[ndim-1]%2==0) 831ba6e06d0SAmlan Barua NM = 2; 832e5d7f247SAmlan Barua else 833ba6e06d0SAmlan Barua NM = 1; 834e5d7f247SAmlan Barua 8356971673cSAmlan Barua j = low; 8366971673cSAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 8376971673cSAmlan Barua { 8386971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 8396971673cSAmlan Barua indx2[i] = j; 840ba6e06d0SAmlan Barua //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank); 8416971673cSAmlan Barua if (k%dim[ndim-1]==0) 8426971673cSAmlan Barua { j+=NM;} 8436971673cSAmlan Barua j++; 8446971673cSAmlan Barua } 8456971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 8466971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 8476971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 8486971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8496971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8506971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 851b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 852b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 853b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 854b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 8553c3a9c75SAmlan Barua break; 856*bd59e6c6SAmlan Barua #endif 8573c3a9c75SAmlan Barua } 858e81bb599SAmlan Barua } 8593c3a9c75SAmlan Barua 8603c3a9c75SAmlan Barua return 0; 8613c3a9c75SAmlan Barua } 8623c3a9c75SAmlan Barua EXTERN_C_END 8633c3a9c75SAmlan Barua 8643c3a9c75SAmlan Barua #undef __FUNCT__ 8653c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT" 8663c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 8673c3a9c75SAmlan Barua { 8683c3a9c75SAmlan Barua PetscErrorCode ierr; 8693c3a9c75SAmlan Barua PetscFunctionBegin; 8703c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 8713c3a9c75SAmlan Barua PetscFunctionReturn(0); 8723c3a9c75SAmlan Barua } 87354efbe56SHong Zhang 8745b3e41ffSAmlan Barua /* 8755b3e41ffSAmlan Barua OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 8765b3e41ffSAmlan Barua Input A, x, y 8775b3e41ffSAmlan Barua A - FFTW matrix 8785b3e41ffSAmlan Barua x - FFTW vector 8795b3e41ffSAmlan Barua y - PETSc vector that user can read 8805b3e41ffSAmlan Barua Options Database Keys: 8815b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 8825b3e41ffSAmlan Barua 8835b3e41ffSAmlan Barua Level: intermediate 8845b3e41ffSAmlan Barua 8853c3a9c75SAmlan Barua */ 8863c3a9c75SAmlan Barua 8873c3a9c75SAmlan Barua EXTERN_C_BEGIN 8883c3a9c75SAmlan Barua #undef __FUNCT__ 8895b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW" 8905b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 8915b3e41ffSAmlan Barua { 8925b3e41ffSAmlan Barua PetscErrorCode ierr; 8935b3e41ffSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 8945b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 8955b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 8965b3e41ffSAmlan Barua PetscInt N=fft->N, N1, n1 ,NM; 897b223cc97SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 898b223cc97SAmlan Barua PetscInt low, *indx1, *indx2, tempindx, tempindx1; 899ba6e06d0SAmlan Barua PetscInt i,j,k,rank,size,partial_dim; 9005b3e41ffSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 901ba6e06d0SAmlan Barua ptrdiff_t local_n1,local_1_start,temp; 9025b3e41ffSAmlan Barua VecScatter vecscat; 9035b3e41ffSAmlan Barua IS list1,list2; 9045b3e41ffSAmlan Barua 9055b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 9065b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 907cfe1ae98SAmlan Barua ierr = VecGetOwnershipRange(x,&low,PETSC_NULL); 9085b3e41ffSAmlan Barua 909e81bb599SAmlan Barua if (size==1){ 9107536937bSAmlan Barua /* 9115b3e41ffSAmlan Barua switch (ndim){ 9125b3e41ffSAmlan Barua case 1: 913e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 914e81bb599SAmlan Barua for (i=0;i<dim[0];i++) 915e81bb599SAmlan Barua { 916e81bb599SAmlan Barua indx1[i] = i; 917e81bb599SAmlan Barua } 918e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 919e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 920e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 921e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 923b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 924b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 925e81bb599SAmlan Barua break; 926e81bb599SAmlan Barua 927e81bb599SAmlan Barua case 2: 928e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 929e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 930e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 931e81bb599SAmlan Barua indx1[i*dim[1]+j] = i*dim[1] + j; 932e81bb599SAmlan Barua } 933e81bb599SAmlan Barua } 934e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 935e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 936e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 937e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 938e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 939b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 940b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 941e81bb599SAmlan Barua break; 942e81bb599SAmlan Barua case 3: 943e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 944e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 945e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 946e81bb599SAmlan Barua for (k=0;k<dim[2];k++){ 947e81bb599SAmlan Barua indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 948e81bb599SAmlan Barua } 949e81bb599SAmlan Barua } 950e81bb599SAmlan Barua } 951e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 952e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 953e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 954e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 955e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 956b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 957b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 958e81bb599SAmlan Barua break; 959e81bb599SAmlan Barua default: 9607536937bSAmlan Barua */ 9616971673cSAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1); 9626971673cSAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF); 9636971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 9646971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9656971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9666971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 967b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 9687536937bSAmlan Barua // break; 9697536937bSAmlan Barua // } 970e81bb599SAmlan Barua } 971e81bb599SAmlan Barua else{ 972e81bb599SAmlan Barua 973e81bb599SAmlan Barua switch (ndim){ 974e81bb599SAmlan Barua case 1: 9756a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 9765b3e41ffSAmlan Barua break; 9775b3e41ffSAmlan Barua case 2: 978*bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 979*bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 980*bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 981*bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 982*bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 983*bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 984*bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 985*bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 986*bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 987*bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 988*bd59e6c6SAmlan Barua break; 989*bd59e6c6SAmlan Barua #else 9905b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 9915b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 9925b3e41ffSAmlan Barua 9935b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 9945b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 9955b3e41ffSAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 9965b3e41ffSAmlan Barua 9975b3e41ffSAmlan Barua if (dim[1]%2==0) 9985b3e41ffSAmlan Barua NM = dim[1]+2; 9995b3e41ffSAmlan Barua else 10005b3e41ffSAmlan Barua NM = dim[1]+1; 10015b3e41ffSAmlan Barua 10025b3e41ffSAmlan Barua 10035b3e41ffSAmlan Barua 10045b3e41ffSAmlan Barua for (i=0;i<local_n0;i++){ 10055b3e41ffSAmlan Barua for (j=0;j<dim[1];j++){ 10065b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 10075b3e41ffSAmlan Barua tempindx1 = i*NM + j; 10085b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 10095b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 1010cfe1ae98SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 1011cfe1ae98SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 1012cfe1ae98SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 1013cfe1ae98SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 10145b3e41ffSAmlan Barua } 10155b3e41ffSAmlan Barua } 10165b3e41ffSAmlan Barua 10175b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 10185b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 10195b3e41ffSAmlan Barua 10205b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 10215b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10225b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10235b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1024b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1025b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1026b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1027b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 10285b3e41ffSAmlan Barua break; 1029*bd59e6c6SAmlan Barua #endif 10305b3e41ffSAmlan Barua case 3: 1031*bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1032*bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1033*bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1034*bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1035*bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1036*bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1037*bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1038*bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1039*bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1040*bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1041*bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1042*bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1043*bd59e6c6SAmlan Barua break; 1044*bd59e6c6SAmlan Barua #else 1045*bd59e6c6SAmlan Barua 104651d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 104751d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 104851d1eed7SAmlan Barua 104951d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 105051d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 105151d1eed7SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 105251d1eed7SAmlan Barua 105351d1eed7SAmlan Barua if (dim[2]%2==0) 105451d1eed7SAmlan Barua NM = dim[2]+2; 105551d1eed7SAmlan Barua else 105651d1eed7SAmlan Barua NM = dim[2]+1; 105751d1eed7SAmlan Barua 105851d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 105951d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 106051d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 106151d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 106251d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 106351d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 106451d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 106551d1eed7SAmlan Barua } 106651d1eed7SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 106751d1eed7SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 106851d1eed7SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 106951d1eed7SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 107051d1eed7SAmlan Barua } 107151d1eed7SAmlan Barua } 107251d1eed7SAmlan Barua 107351d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 107451d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 107551d1eed7SAmlan Barua 107651d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 107751d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 107851d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 107951d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1080b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1081b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1082b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1083b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 10845b3e41ffSAmlan Barua break; 1085*bd59e6c6SAmlan Barua #endif 10865b3e41ffSAmlan Barua default: 1087*bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1088*bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1089*bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1090*bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1091*bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1092*bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1093*bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1094*bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1095*bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1096*bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1097*bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1098*bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1099*bd59e6c6SAmlan Barua #else 1100ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1101ba6e06d0SAmlan Barua printf("The value of temp is %ld\n",temp); 1102ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1103ba6e06d0SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1104ba6e06d0SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1105ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1106ba6e06d0SAmlan Barua 1107ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1108ba6e06d0SAmlan Barua printf("The value of partial dim is %d\n",partial_dim); 1109ba6e06d0SAmlan Barua 1110ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1111ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1112ba6e06d0SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 1113ba6e06d0SAmlan Barua 1114ba6e06d0SAmlan Barua if (dim[ndim-1]%2==0) 1115ba6e06d0SAmlan Barua NM = 2; 1116ba6e06d0SAmlan Barua else 1117ba6e06d0SAmlan Barua NM = 1; 1118ba6e06d0SAmlan Barua 1119ba6e06d0SAmlan Barua j = low; 1120ba6e06d0SAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1121ba6e06d0SAmlan Barua { 1122ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1123ba6e06d0SAmlan Barua indx2[i] = j; 1124ba6e06d0SAmlan Barua //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank); 1125ba6e06d0SAmlan Barua if (k%dim[ndim-1]==0) 1126ba6e06d0SAmlan Barua { j+=NM;} 1127ba6e06d0SAmlan Barua j++; 1128ba6e06d0SAmlan Barua } 1129ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1130ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1131ba6e06d0SAmlan Barua 1132ba6e06d0SAmlan Barua //ISView(list1,PETSC_VIEWER_STDOUT_SELF); 1133ba6e06d0SAmlan Barua 1134ba6e06d0SAmlan Barua 1135ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1136ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1137ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1138ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1139b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1140b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1141b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1142b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1143ba6e06d0SAmlan Barua 11445b3e41ffSAmlan Barua break; 1145*bd59e6c6SAmlan Barua #endif 11465b3e41ffSAmlan Barua } 1147e81bb599SAmlan Barua } 11485b3e41ffSAmlan Barua return 0; 11495b3e41ffSAmlan Barua } 11505b3e41ffSAmlan Barua EXTERN_C_END 11515b3e41ffSAmlan Barua 11525b3e41ffSAmlan Barua EXTERN_C_BEGIN 11535b3e41ffSAmlan Barua #undef __FUNCT__ 11543c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 11553c3a9c75SAmlan Barua /* 11563c3a9c75SAmlan Barua MatCreate_FFTW - Creates a matrix object that provides FFT 11573c3a9c75SAmlan Barua via the external package FFTW 11583c3a9c75SAmlan Barua Options Database Keys: 11593c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 11603c3a9c75SAmlan Barua 11613c3a9c75SAmlan Barua Level: intermediate 11623c3a9c75SAmlan Barua 11633c3a9c75SAmlan Barua */ 11643c3a9c75SAmlan Barua 1165b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 1166b2b77a04SHong Zhang { 1167b2b77a04SHong Zhang PetscErrorCode ierr; 1168b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 1169b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1170b2b77a04SHong Zhang Mat_FFTW *fftw; 1171b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1172b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1173b2b77a04SHong Zhang PetscBool flg; 1174b3a17365SAmlan Barua PetscInt p_flag,partial_dim=1,ctr,N1; 117511902ff2SHong Zhang PetscMPIInt size,rank; 1176b3a17365SAmlan Barua ptrdiff_t *pdim, temp; 11774a16a297SSean Farley ptrdiff_t local_n1,local_1_start,local_1_end; 1178b2b77a04SHong Zhang 1179b2b77a04SHong Zhang PetscFunctionBegin; 1180b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 118111902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 118254dd5118SAmlan Barua ierr = MPI_Barrier(PETSC_COMM_WORLD); 1183c92418ccSAmlan Barua 118411902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 118511902ff2SHong Zhang pdim[0] = dim[0]; 118611902ff2SHong Zhang for(ctr=1;ctr<ndim;ctr++) 118711902ff2SHong Zhang { 11886882372aSHong Zhang partial_dim *= dim[ctr]; 118911902ff2SHong Zhang pdim[ctr] = dim[ctr]; 11906882372aSHong Zhang } 11913c3a9c75SAmlan Barua 1192b2b77a04SHong Zhang if (size == 1) { 1193e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1194b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1195b2b77a04SHong Zhang n = N; 1196e81bb599SAmlan Barua #else 1197e81bb599SAmlan Barua int tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1]; 1198e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1199e81bb599SAmlan Barua n = tot_dim; 1200e81bb599SAmlan Barua #endif 1201e81bb599SAmlan Barua 1202b2b77a04SHong Zhang } else { 1203b223cc97SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start;//local_n1,local_1_end; 1204b2b77a04SHong Zhang switch (ndim){ 1205b2b77a04SHong Zhang case 1: 12063c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12073c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1208e5d7f247SAmlan Barua #else 12096882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 12106882372aSHong Zhang n = (PetscInt)local_n0; 12116882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1212e5d7f247SAmlan Barua #endif 12133c3a9c75SAmlan Barua // PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW); 1214b2b77a04SHong Zhang break; 1215b2b77a04SHong Zhang case 2: 12165b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 1217b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1218b2b77a04SHong Zhang /* 1219b2b77a04SHong Zhang PetscMPIInt rank; 1220b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1221b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 1222b2b77a04SHong Zhang */ 1223b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1224b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 12255b3e41ffSAmlan Barua #else 12265b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 12275b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 12285b3e41ffSAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 12295b3e41ffSAmlan Barua #endif 1230b2b77a04SHong Zhang break; 1231b2b77a04SHong Zhang case 3: 123211902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 123351d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 123451d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 12356882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 12366882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 123751d1eed7SAmlan Barua #else 123851d1eed7SAmlan Barua printf("The code comes here\n"); 123951d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 124051d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 124151d1eed7SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 124251d1eed7SAmlan Barua #endif 1243b2b77a04SHong Zhang break; 1244b2b77a04SHong Zhang default: 1245b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 124611902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1247c92418ccSAmlan Barua // printf("The value of alloc local is %ld from process %d\n",alloc_local,rank); 124811902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 12496882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 125011902ff2SHong Zhang // printf("New partial dimension is %d %d %d",n,N,ndim); 12516882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1252b3a17365SAmlan Barua #else 1253b3a17365SAmlan Barua temp = pdim[ndim-1]; 1254b3a17365SAmlan Barua pdim[ndim-1]= temp/2 + 1; 1255b3a17365SAmlan Barua printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]); 1256b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1257b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1258b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1259b3a17365SAmlan Barua pdim[ndim-1] = temp; 1260b3a17365SAmlan Barua printf("For Multi dim case n = %d, N1 = %d\n",n,N1); 1261b3a17365SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1262b3a17365SAmlan Barua #endif 1263b2b77a04SHong Zhang break; 1264b2b77a04SHong Zhang } 1265b2b77a04SHong Zhang } 1266b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1267b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1268b2b77a04SHong Zhang fft->data = (void*)fftw; 1269b2b77a04SHong Zhang 1270b2b77a04SHong Zhang fft->n = n; 1271c92418ccSAmlan Barua fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1272e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 1273c92418ccSAmlan Barua ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1274b1301b2eSHong Zhang for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1275c92418ccSAmlan Barua 1276b2b77a04SHong Zhang fftw->p_forward = 0; 1277b2b77a04SHong Zhang fftw->p_backward = 0; 1278b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1279b2b77a04SHong Zhang 1280b2b77a04SHong Zhang if (size == 1){ 1281b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1282b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1283b2b77a04SHong Zhang } else { 1284b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1285b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1286b2b77a04SHong Zhang } 1287b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 12886a506ed8SAmlan Barua // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D 1289b2b77a04SHong Zhang A->ops->getvecs = MatGetVecs_FFTW; 1290b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 12913c3a9c75SAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 12925b3e41ffSAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 1293b2b77a04SHong Zhang 1294b2b77a04SHong Zhang /* get runtime options */ 1295b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1296b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1297b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 1298b2b77a04SHong Zhang PetscOptionsEnd(); 1299b2b77a04SHong Zhang PetscFunctionReturn(0); 1300b2b77a04SHong Zhang } 1301b2b77a04SHong Zhang EXTERN_C_END 13023c3a9c75SAmlan Barua 13033c3a9c75SAmlan Barua 13043c3a9c75SAmlan Barua 13053c3a9c75SAmlan Barua 1306