1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14e5d7f247SAmlan Barua PetscInt partial_dim; 15b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 16b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 19b2b77a04SHong Zhang } Mat_FFTW; 20b2b77a04SHong Zhang 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 274be45526SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); // to be removed! 28b2b77a04SHong Zhang 29b2b77a04SHong Zhang #undef __FUNCT__ 30b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 31b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 32b2b77a04SHong Zhang { 33b2b77a04SHong Zhang PetscErrorCode ierr; 34b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 35b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 36b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 37b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 38b2b77a04SHong Zhang 39b2b77a04SHong Zhang PetscFunctionBegin; 40b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 41b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 42b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 43b2b77a04SHong Zhang switch (ndim){ 44b2b77a04SHong Zhang case 1: 4558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 46b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 4758a912c5SAmlan Barua #else 4858a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 4958a912c5SAmlan Barua #endif 50b2b77a04SHong Zhang break; 51b2b77a04SHong Zhang case 2: 5258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 53b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 5458a912c5SAmlan Barua #else 5558a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 5658a912c5SAmlan Barua #endif 57b2b77a04SHong Zhang break; 58b2b77a04SHong Zhang case 3: 5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 60b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6158a912c5SAmlan Barua #else 6258a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 6358a912c5SAmlan Barua #endif 64b2b77a04SHong Zhang break; 65b2b77a04SHong Zhang default: 6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 67b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6858a912c5SAmlan Barua #else 6958a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 7058a912c5SAmlan Barua #endif 71b2b77a04SHong Zhang break; 72b2b77a04SHong Zhang } 73b2b77a04SHong Zhang fftw->finarray = x_array; 74b2b77a04SHong Zhang fftw->foutarray = y_array; 75b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 76b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 77b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 78b2b77a04SHong Zhang } else { /* use existing plan */ 79b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 80b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 81b2b77a04SHong Zhang } else { 82b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 83b2b77a04SHong Zhang } 84b2b77a04SHong Zhang } 85b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 86b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 87b2b77a04SHong Zhang PetscFunctionReturn(0); 88b2b77a04SHong Zhang } 89b2b77a04SHong Zhang 90b2b77a04SHong Zhang #undef __FUNCT__ 91b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 92b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 93b2b77a04SHong Zhang { 94b2b77a04SHong Zhang PetscErrorCode ierr; 95b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 96b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 97b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 98b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 99b2b77a04SHong Zhang 100b2b77a04SHong Zhang PetscFunctionBegin; 101b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 102b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 103b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 104b2b77a04SHong Zhang switch (ndim){ 105b2b77a04SHong Zhang case 1: 10658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 107b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 10858a912c5SAmlan Barua #else 109e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 11058a912c5SAmlan Barua #endif 111b2b77a04SHong Zhang break; 112b2b77a04SHong Zhang case 2: 11358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 114b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 11558a912c5SAmlan Barua #else 116e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 11758a912c5SAmlan Barua #endif 118b2b77a04SHong Zhang break; 119b2b77a04SHong Zhang case 3: 12058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 121b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12258a912c5SAmlan Barua #else 123e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 12458a912c5SAmlan Barua #endif 125b2b77a04SHong Zhang break; 126b2b77a04SHong Zhang default: 12758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 128b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12958a912c5SAmlan Barua #else 130e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 13158a912c5SAmlan Barua #endif 132b2b77a04SHong Zhang break; 133b2b77a04SHong Zhang } 134b2b77a04SHong Zhang fftw->binarray = x_array; 135b2b77a04SHong Zhang fftw->boutarray = y_array; 136b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 137b2b77a04SHong Zhang } else { /* use existing plan */ 138b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 139b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 140b2b77a04SHong Zhang } else { 141b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 142b2b77a04SHong Zhang } 143b2b77a04SHong Zhang } 144b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 145b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 146b2b77a04SHong Zhang PetscFunctionReturn(0); 147b2b77a04SHong Zhang } 148b2b77a04SHong Zhang 149b2b77a04SHong Zhang #undef __FUNCT__ 150b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 151b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 152b2b77a04SHong Zhang { 153b2b77a04SHong Zhang PetscErrorCode ierr; 154b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 155b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 156b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 157c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 158b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 159b2b77a04SHong Zhang 160b2b77a04SHong Zhang PetscFunctionBegin; 161b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 162b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 163b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 164b2b77a04SHong Zhang switch (ndim){ 165b2b77a04SHong Zhang case 1: 1663c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 167b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 168ae0a50aaSHong Zhang #else 169ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 1703c3a9c75SAmlan Barua #endif 171b2b77a04SHong Zhang break; 172b2b77a04SHong Zhang case 2: 1733c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 174b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1753c3a9c75SAmlan Barua #else 1763c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1773c3a9c75SAmlan Barua #endif 178b2b77a04SHong Zhang break; 179b2b77a04SHong Zhang case 3: 1803c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 181b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1823c3a9c75SAmlan Barua #else 18351d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1843c3a9c75SAmlan Barua #endif 185b2b77a04SHong Zhang break; 186b2b77a04SHong Zhang default: 1873c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 188c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1893c3a9c75SAmlan Barua #else 1903c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1913c3a9c75SAmlan Barua #endif 192b2b77a04SHong Zhang break; 193b2b77a04SHong Zhang } 194b2b77a04SHong Zhang fftw->finarray = x_array; 195b2b77a04SHong Zhang fftw->foutarray = y_array; 196b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 197b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 198b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 199b2b77a04SHong Zhang } else { /* use existing plan */ 200b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 201b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 202b2b77a04SHong Zhang } else { 203b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 204b2b77a04SHong Zhang } 205b2b77a04SHong Zhang } 206b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 207b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 208b2b77a04SHong Zhang PetscFunctionReturn(0); 209b2b77a04SHong Zhang } 210b2b77a04SHong Zhang 211b2b77a04SHong Zhang #undef __FUNCT__ 212b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 213b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 214b2b77a04SHong Zhang { 215b2b77a04SHong Zhang PetscErrorCode ierr; 216b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 217b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 218b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 219c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 220b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 221b2b77a04SHong Zhang 222b2b77a04SHong Zhang PetscFunctionBegin; 223b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 224b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 225b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 226b2b77a04SHong Zhang switch (ndim){ 227b2b77a04SHong Zhang case 1: 2283c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 229b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 230ae0a50aaSHong Zhang #else 231ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 2323c3a9c75SAmlan Barua #endif 233b2b77a04SHong Zhang break; 234b2b77a04SHong Zhang case 2: 2353c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 236b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2373c3a9c75SAmlan Barua #else 2383c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2393c3a9c75SAmlan Barua #endif 240b2b77a04SHong Zhang break; 241b2b77a04SHong Zhang case 3: 2423c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 243b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2443c3a9c75SAmlan Barua #else 2453c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2463c3a9c75SAmlan Barua #endif 247b2b77a04SHong Zhang break; 248b2b77a04SHong Zhang default: 2493c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 250c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2513c3a9c75SAmlan Barua #else 2523c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2533c3a9c75SAmlan Barua #endif 254b2b77a04SHong Zhang break; 255b2b77a04SHong Zhang } 256b2b77a04SHong Zhang fftw->binarray = x_array; 257b2b77a04SHong Zhang fftw->boutarray = y_array; 258b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 259b2b77a04SHong Zhang } else { /* use existing plan */ 260b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 261b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 262b2b77a04SHong Zhang } else { 263b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 264b2b77a04SHong Zhang } 265b2b77a04SHong Zhang } 266b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 267b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 268b2b77a04SHong Zhang PetscFunctionReturn(0); 269b2b77a04SHong Zhang } 270b2b77a04SHong Zhang 271b2b77a04SHong Zhang #undef __FUNCT__ 272b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 273b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 274b2b77a04SHong Zhang { 275b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 276b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 277b2b77a04SHong Zhang PetscErrorCode ierr; 278b2b77a04SHong Zhang 279b2b77a04SHong Zhang PetscFunctionBegin; 280b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 281b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 282b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 283bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 284b2b77a04SHong Zhang PetscFunctionReturn(0); 285b2b77a04SHong Zhang } 286b2b77a04SHong Zhang 287c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 288b2b77a04SHong Zhang #undef __FUNCT__ 289b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 290b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 291b2b77a04SHong Zhang { 292b2b77a04SHong Zhang PetscErrorCode ierr; 293b2b77a04SHong Zhang PetscScalar *array; 294b2b77a04SHong Zhang 295b2b77a04SHong Zhang PetscFunctionBegin; 296b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 297b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 298b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 299b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 300b2b77a04SHong Zhang PetscFunctionReturn(0); 301b2b77a04SHong Zhang } 302b2b77a04SHong Zhang 303b2b77a04SHong Zhang #undef __FUNCT__ 3044be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_1DC" 305c92418ccSAmlan Barua /* 3064be45526SHong Zhang - Get Vectors(s) compatible with matrix, i.e. with the 307c92418ccSAmlan Barua parallel layout determined by FFTW-1D 308c92418ccSAmlan Barua 309c92418ccSAmlan Barua */ 3104be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_1DC(Mat A,Vec *fin,Vec *fout,Vec *bout) 311c92418ccSAmlan Barua { 312c92418ccSAmlan Barua PetscErrorCode ierr; 313c92418ccSAmlan Barua PetscMPIInt size,rank; 314c92418ccSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 315c92418ccSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 316c92418ccSAmlan Barua // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 317c92418ccSAmlan Barua PetscInt N=fft->N; 318c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 319c92418ccSAmlan Barua ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 320c92418ccSAmlan Barua ptrdiff_t f_local_n1,f_local_1_end; 321c92418ccSAmlan Barua ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 322c92418ccSAmlan Barua ptrdiff_t b_local_n1,b_local_1_end; 323ae0a50aaSHong Zhang fftw_complex *data_fin,*data_fout,*data_bout; 324c92418ccSAmlan Barua 325c92418ccSAmlan Barua PetscFunctionBegin; 326c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 327c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 328c92418ccSAmlan Barua #endif 329c92418ccSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 330c92418ccSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 331c92418ccSAmlan Barua if (size == 1){ 332c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 333c92418ccSAmlan Barua } 334c92418ccSAmlan Barua else { 335c92418ccSAmlan Barua if (ndim>1){ 336c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 337c92418ccSAmlan Barua else { 338c92418ccSAmlan Barua f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 339c92418ccSAmlan Barua b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 340c92418ccSAmlan Barua if (fin) { 341c92418ccSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 342c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 343c92418ccSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 344c92418ccSAmlan Barua } 345c92418ccSAmlan Barua if (fout) { 346c92418ccSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 347c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 348c92418ccSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 349c92418ccSAmlan Barua } 350c92418ccSAmlan Barua if (bout) { 351c92418ccSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 352c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 353c92418ccSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 354c92418ccSAmlan Barua } 355c92418ccSAmlan Barua } 356c92418ccSAmlan Barua if (fin){ 357c92418ccSAmlan Barua ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 358c92418ccSAmlan Barua } 359c92418ccSAmlan Barua if (fout){ 360c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 361c92418ccSAmlan Barua } 362c92418ccSAmlan Barua if (bout){ 363c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 364c92418ccSAmlan Barua } 365c92418ccSAmlan Barua PetscFunctionReturn(0); 366c92418ccSAmlan Barua } 367c92418ccSAmlan Barua 368c92418ccSAmlan Barua 369c92418ccSAmlan Barua } 3703c3a9c75SAmlan Barua 3714f7415efSAmlan Barua #undef __FUNCT__ 3724be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW" 3734be45526SHong Zhang /*@ 374b2aa233eSHong Zhang MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 3754f7415efSAmlan Barua parallel layout determined by FFTW 3764f7415efSAmlan Barua 3774f7415efSAmlan Barua Collective on Mat 3784f7415efSAmlan Barua 3794f7415efSAmlan Barua Input Parameter: 3804f7415efSAmlan Barua . mat - the matrix 3814f7415efSAmlan Barua 3824f7415efSAmlan Barua Output Parameter: 3834f7415efSAmlan Barua + fin - (optional) input vector of forward FFTW 3844f7415efSAmlan Barua - fout - (optional) output vector of forward FFTW 3854f7415efSAmlan Barua 3864f7415efSAmlan Barua Level: advanced 3874f7415efSAmlan Barua 3884f7415efSAmlan Barua .seealso: MatCreateFFTW() 3894be45526SHong Zhang @*/ 3904be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 3914be45526SHong Zhang { 3924be45526SHong Zhang PetscErrorCode ierr; 3934be45526SHong Zhang PetscFunctionBegin; 3944be45526SHong Zhang ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 3954be45526SHong Zhang PetscFunctionReturn(0); 3964be45526SHong Zhang } 3974be45526SHong Zhang 3984f7415efSAmlan Barua EXTERN_C_BEGIN 3994be45526SHong Zhang #undef __FUNCT__ 4004be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW" 4014be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 4024f7415efSAmlan Barua { 4034f7415efSAmlan Barua PetscErrorCode ierr; 4044f7415efSAmlan Barua PetscMPIInt size,rank; 4054f7415efSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 4064f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 4074f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 4089496c9aeSAmlan Barua PetscInt N=fft->N; 4094f7415efSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 4104f7415efSAmlan Barua 4114f7415efSAmlan Barua PetscFunctionBegin; 4124f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 4134f7415efSAmlan Barua PetscValidType(A,1); 4144f7415efSAmlan Barua 4154f7415efSAmlan Barua ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 4164f7415efSAmlan Barua ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 4174f7415efSAmlan Barua if (size == 1){ /* sequential case */ 4184f7415efSAmlan Barua printf("Routine is getting called\n"); 4194f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4204f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 4214f7415efSAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 4224f7415efSAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 4234f7415efSAmlan Barua #else 4249496c9aeSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 4254f7415efSAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 4264f7415efSAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);} 4274f7415efSAmlan Barua #endif 4284f7415efSAmlan Barua } else { 4294f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 4309496c9aeSAmlan Barua ptrdiff_t local_n1; 4319496c9aeSAmlan Barua fftw_complex *data_fout; 4329496c9aeSAmlan Barua ptrdiff_t local_1_start; 4339496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4349496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 4359496c9aeSAmlan Barua #else 4364f7415efSAmlan Barua double *data_finr,*data_boutr; 4379496c9aeSAmlan Barua PetscInt n1,N1,vsize; 4389496c9aeSAmlan Barua ptrdiff_t temp; 4399496c9aeSAmlan Barua #endif 4409496c9aeSAmlan Barua 4414f7415efSAmlan Barua switch (ndim){ 4424f7415efSAmlan Barua case 1: 44357625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 44457625b84SAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 44557625b84SAmlan Barua #else 4469496c9aeSAmlan Barua //SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet"); 44757625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 44857625b84SAmlan Barua if (fin) { 44957625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 45057625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 45157625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 45257625b84SAmlan Barua } 45357625b84SAmlan Barua if (fout) { 45457625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 45557625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 45657625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 45757625b84SAmlan Barua } 45857625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 45957625b84SAmlan Barua if (bout) { 46057625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 46157625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 46257625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 46357625b84SAmlan Barua } 46457625b84SAmlan Barua break; 46557625b84SAmlan Barua #endif 4664f7415efSAmlan Barua case 2: 4674f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4684f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4694f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 4704f7415efSAmlan Barua if (fin) { 4714f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4724f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 4734f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4744f7415efSAmlan Barua } 4754f7415efSAmlan Barua if (bout) { 4764f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4774f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 4784f7415efSAmlan Barua ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 4794f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4804f7415efSAmlan Barua } 481*c8a8a4f0SAmlan Barua //n1 = 2*local_n1*dim[0]; 48257625b84SAmlan Barua if (fout) { 48357625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4849496c9aeSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 48557625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 48657625b84SAmlan Barua } 4874f7415efSAmlan Barua #else 4884f7415efSAmlan Barua /* Get local size */ 4894f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 4904f7415efSAmlan Barua if (fin) { 4914f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4924f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4934f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4944f7415efSAmlan Barua } 4954f7415efSAmlan Barua if (fout) { 4964f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4974f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4984f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4994f7415efSAmlan Barua } 5004f7415efSAmlan Barua if (bout) { 5014f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5024f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5034f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5044f7415efSAmlan Barua } 5054f7415efSAmlan Barua #endif 5064f7415efSAmlan Barua break; 5074f7415efSAmlan Barua case 3: 5084f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5094f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5104f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 5114f7415efSAmlan Barua if (fin) { 5124f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5134f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 5144f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5154f7415efSAmlan Barua } 5164f7415efSAmlan Barua if (bout) { 5174f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5184f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 5194f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5204f7415efSAmlan Barua } 521*c8a8a4f0SAmlan Barua //n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 52257625b84SAmlan Barua if (fout) { 52357625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 52457625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 52557625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 52657625b84SAmlan Barua } 5274f7415efSAmlan Barua #else 5280c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 5290c9b18e4SAmlan Barua if (fin) { 5300c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5310c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5320c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5330c9b18e4SAmlan Barua } 5340c9b18e4SAmlan Barua if (fout) { 5350c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5360c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5370c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5380c9b18e4SAmlan Barua } 5390c9b18e4SAmlan Barua if (bout) { 5400c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5410c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5420c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5430c9b18e4SAmlan Barua } 5444f7415efSAmlan Barua #endif 5454f7415efSAmlan Barua break; 5464f7415efSAmlan Barua default: 5474f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5484f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 5494f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 5504f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5514f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 5524f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 5534f7415efSAmlan Barua 5544f7415efSAmlan Barua if (fin) { 5554f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5564f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 5574f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5584f7415efSAmlan Barua } 5594f7415efSAmlan Barua if (bout) { 5604f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5614f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 5629496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5634f7415efSAmlan Barua } 564*c8a8a4f0SAmlan Barua //temp = fftw->partial_dim; 565*c8a8a4f0SAmlan Barua //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 566*c8a8a4f0SAmlan Barua //n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 56757625b84SAmlan Barua if (fout) { 56857625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 56957625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 57057625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 57157625b84SAmlan Barua } 5724f7415efSAmlan Barua #else 5730c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 5740c9b18e4SAmlan Barua if (fin) { 5750c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5760c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5770c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5780c9b18e4SAmlan Barua } 5790c9b18e4SAmlan Barua if (fout) { 5800c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5810c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5820c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5830c9b18e4SAmlan Barua } 5840c9b18e4SAmlan Barua if (bout) { 5850c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5860c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5870c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5880c9b18e4SAmlan Barua } 5894f7415efSAmlan Barua #endif 5904f7415efSAmlan Barua break; 5914f7415efSAmlan Barua } 5924f7415efSAmlan Barua } 5934f7415efSAmlan Barua PetscFunctionReturn(0); 5944f7415efSAmlan Barua } 5954f7415efSAmlan Barua EXTERN_C_END 5964f7415efSAmlan Barua 597c92418ccSAmlan Barua #undef __FUNCT__ 598b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW" 599b2b77a04SHong Zhang /* 600b2b77a04SHong Zhang MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 601b2b77a04SHong Zhang parallel layout determined by FFTW 602b2b77a04SHong Zhang 603b2b77a04SHong Zhang Collective on Mat 604b2b77a04SHong Zhang 605b2b77a04SHong Zhang Input Parameter: 606b2b77a04SHong Zhang . mat - the matrix 607b2b77a04SHong Zhang 608b2b77a04SHong Zhang Output Parameter: 609b2b77a04SHong Zhang + fin - (optional) input vector of forward FFTW 610b2b77a04SHong Zhang - fout - (optional) output vector of forward FFTW 611b2b77a04SHong Zhang 612b2b77a04SHong Zhang Level: advanced 613b2b77a04SHong Zhang 614b2b77a04SHong Zhang .seealso: MatCreateFFTW() 615b2b77a04SHong Zhang */ 616b2b77a04SHong Zhang PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 617b2b77a04SHong Zhang { 618b2b77a04SHong Zhang PetscErrorCode ierr; 619b2b77a04SHong Zhang PetscMPIInt size,rank; 620b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 621b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 622c92418ccSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 6239496c9aeSAmlan Barua PetscInt N=fft->N; 624e81bb599SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 625b2b77a04SHong Zhang 626b2b77a04SHong Zhang PetscFunctionBegin; 627b2b77a04SHong Zhang PetscValidHeaderSpecific(A,MAT_CLASSID,1); 628b2b77a04SHong Zhang PetscValidType(A,1); 629b2b77a04SHong Zhang 630b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 631b2b77a04SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 632b2b77a04SHong Zhang if (size == 1){ /* sequential case */ 633e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX) 634b2b77a04SHong Zhang if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 635b2b77a04SHong Zhang if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 636e5d7f247SAmlan Barua #else 637e81bb599SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 638e81bb599SAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 639e81bb599SAmlan Barua #endif 640b2b77a04SHong Zhang } else { /* mpi case */ 641b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 6426882372aSHong Zhang ptrdiff_t local_n1,local_1_end; 643b2b77a04SHong Zhang fftw_complex *data_fin,*data_fout; 6449496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 64551d1eed7SAmlan Barua double *data_finr ; 646b3a17365SAmlan Barua ptrdiff_t local_1_start,temp; 6479496c9aeSAmlan Barua PetscInt vsize,n1,N1; 6489496c9aeSAmlan Barua #endif 6499496c9aeSAmlan Barua 650c92418ccSAmlan Barua // PetscInt ctr; 651c92418ccSAmlan Barua // ptrdiff_t ndim1,*pdim; 652c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 653c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 65411902ff2SHong Zhang 655c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 656f76f76beSAmlan Barua // {k 657c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 658c92418ccSAmlan Barua // } 659b2b77a04SHong Zhang 660f76f76beSAmlan Barua 661f76f76beSAmlan Barua 662b2b77a04SHong Zhang switch (ndim){ 663b2b77a04SHong Zhang case 1: 6646882372aSHong Zhang /* Get local size */ 665e5d7f247SAmlan Barua /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */ 666c92418ccSAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 6676882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 6689496c9aeSAmlan Barua printf("The values of local_n0 and local_n1 are %ld and %ld\n",local_n0,local_n1); 6696882372aSHong Zhang if (fin) { 6706882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6716882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 6726882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6736882372aSHong Zhang } 6746882372aSHong Zhang if (fout) { 6756882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 67657625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6776882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6786882372aSHong Zhang } 679b2b77a04SHong Zhang break; 680b2b77a04SHong Zhang case 2: 6813c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6823c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 6833c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 6843c3a9c75SAmlan Barua if (fin) { 6853c3a9c75SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 68654dd5118SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 6873c3a9c75SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 68809dd8a53SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 6893c3a9c75SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6903c3a9c75SAmlan Barua } 69157625b84SAmlan Barua n1 = 2*local_n1*(dim[0]); 69257625b84SAmlan Barua //n1 = 2*local_n1*dim[1]; 69357625b84SAmlan Barua printf("The values are %ld\n",local_n1); 6943c3a9c75SAmlan Barua if (fout) { 69509dd8a53SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 69609dd8a53SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6973c3a9c75SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6983c3a9c75SAmlan Barua } 699f76f76beSAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 7003c3a9c75SAmlan Barua 7013c3a9c75SAmlan Barua #else 702b2b77a04SHong Zhang /* Get local size */ 70364657d84SAmlan Barua //printf("Hope this does not come here"); 704b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 705b2b77a04SHong Zhang if (fin) { 706b2b77a04SHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 707b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 708b2b77a04SHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 709b2b77a04SHong Zhang } 710b2b77a04SHong Zhang if (fout) { 711b2b77a04SHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 712b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 713b2b77a04SHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 714b2b77a04SHong Zhang } 71564657d84SAmlan Barua //printf("Hope this does not come here"); 7163c3a9c75SAmlan Barua #endif 717b2b77a04SHong Zhang break; 718b2b77a04SHong Zhang case 3: 7196882372aSHong Zhang /* Get local size */ 7203c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 72151d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 72251d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 72351d1eed7SAmlan Barua if (fin) { 72451d1eed7SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 72551d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 72651d1eed7SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 72751d1eed7SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 72851d1eed7SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 72951d1eed7SAmlan Barua } 73057625b84SAmlan Barua printf("The value is %ld",local_n1); 73157625b84SAmlan Barua n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 73251d1eed7SAmlan Barua if (fout) { 73351d1eed7SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 73451d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 73551d1eed7SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 73651d1eed7SAmlan Barua } 73751d1eed7SAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 73851d1eed7SAmlan Barua 73951d1eed7SAmlan Barua 7403c3a9c75SAmlan Barua #else 7416882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 74211902ff2SHong Zhang // printf("The quantity n is %d",n); 7436882372aSHong Zhang if (fin) { 7446882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7456882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 7466882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 7476882372aSHong Zhang } 7486882372aSHong Zhang if (fout) { 7496882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7506882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 7516882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 7526882372aSHong Zhang } 7533c3a9c75SAmlan Barua #endif 754b2b77a04SHong Zhang break; 755b2b77a04SHong Zhang default: 7566882372aSHong Zhang /* Get local size */ 7573c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 758b3a17365SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 759b3a17365SAmlan Barua printf("The value of temp is %ld\n",temp); 760b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 761b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 762b3a17365SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 763b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 764b3a17365SAmlan Barua if (fin) { 765b3a17365SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 766b3a17365SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 767b3a17365SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 768b3a17365SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 769b3a17365SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 770b3a17365SAmlan Barua } 77157625b84SAmlan Barua printf("The value is %ld. Global length is %d \n",local_n1,N1); 77257625b84SAmlan Barua temp = fftw->partial_dim; 77357625b84SAmlan Barua fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 77457625b84SAmlan Barua 77557625b84SAmlan Barua n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 776b3a17365SAmlan Barua if (fout) { 777b3a17365SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 77857625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 779b3a17365SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 780b3a17365SAmlan Barua } 781b3a17365SAmlan Barua 7823c3a9c75SAmlan Barua #else 783c92418ccSAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 78411902ff2SHong Zhang // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 78511902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 78611902ff2SHong Zhang // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 78711902ff2SHong Zhang // for(i=0;i<ndim;i++) 78811902ff2SHong Zhang // { 78911902ff2SHong Zhang // pdim[i]=dim[i];printf("%d",pdim[i]); 79011902ff2SHong Zhang // } 79111902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 79211902ff2SHong Zhang // printf("The quantity n is %d",n); 7936882372aSHong Zhang if (fin) { 7946882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7956882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 7966882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 7976882372aSHong Zhang } 7986882372aSHong Zhang if (fout) { 7996882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 8006882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 8016882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 8026882372aSHong Zhang } 8033c3a9c75SAmlan Barua #endif 804b2b77a04SHong Zhang break; 805b2b77a04SHong Zhang } 806b2b77a04SHong Zhang } 80754dd5118SAmlan Barua // if (fin){ 80854dd5118SAmlan Barua // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 80954dd5118SAmlan Barua // } 81054dd5118SAmlan Barua // if (fout){ 81154dd5118SAmlan Barua // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 81254dd5118SAmlan Barua // } 813b2b77a04SHong Zhang PetscFunctionReturn(0); 814b2b77a04SHong Zhang } 815b2b77a04SHong Zhang 816b2b77a04SHong Zhang #undef __FUNCT__ 8173c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT" 8183c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 8193c3a9c75SAmlan Barua { 8203c3a9c75SAmlan Barua PetscErrorCode ierr; 8213c3a9c75SAmlan Barua PetscFunctionBegin; 8223c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 8233c3a9c75SAmlan Barua PetscFunctionReturn(0); 8243c3a9c75SAmlan Barua } 82554efbe56SHong Zhang 826b2b77a04SHong Zhang /* 8279496c9aeSAmlan Barua InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block. For real 8289496c9aeSAmlan Barua parallel FFT, this routine also performs padding of right number of zeros at the end of the fastetst 8299496c9aeSAmlan Barua changing dimension. 8303c3a9c75SAmlan Barua Input A, x, y 8313c3a9c75SAmlan Barua A - FFTW matrix 83254dd5118SAmlan Barua x - user data 833b2b77a04SHong Zhang Options Database Keys: 834b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 835b2b77a04SHong Zhang 836b2b77a04SHong Zhang Level: intermediate 837b2b77a04SHong Zhang 838b2b77a04SHong Zhang */ 8393c3a9c75SAmlan Barua 8403c3a9c75SAmlan Barua EXTERN_C_BEGIN 8413c3a9c75SAmlan Barua #undef __FUNCT__ 8423c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW" 8433c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 8443c3a9c75SAmlan Barua { 8453c3a9c75SAmlan Barua PetscErrorCode ierr; 8463c3a9c75SAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 8473c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 8483c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 8499496c9aeSAmlan Barua PetscInt N=fft->N; 850b223cc97SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 8519496c9aeSAmlan Barua PetscInt low; 8529496c9aeSAmlan Barua PetscInt rank,size; 8533c3a9c75SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 8549496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 8553c3a9c75SAmlan Barua VecScatter vecscat; 8563c3a9c75SAmlan Barua IS list1,list2; 8579496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 8589496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 8599496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 8609496c9aeSAmlan Barua PetscInt N1, n1 ,NM; 8619496c9aeSAmlan Barua ptrdiff_t temp; 8629496c9aeSAmlan Barua #endif 8633c3a9c75SAmlan Barua 864f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 865f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 8663c3a9c75SAmlan Barua ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 8673c3a9c75SAmlan Barua 868e81bb599SAmlan Barua if (size==1) 869e81bb599SAmlan Barua { 8707536937bSAmlan Barua /* switch (ndim){ 871e81bb599SAmlan Barua case 1: 872e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 873e81bb599SAmlan Barua for (i=0;i<dim[0];i++) 874e81bb599SAmlan Barua { 875e81bb599SAmlan Barua indx1[i] = i; 876e81bb599SAmlan Barua } 877e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 878e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 879e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 882b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 883b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 884e81bb599SAmlan Barua break; 885e81bb599SAmlan Barua 886e81bb599SAmlan Barua case 2: 887e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 888e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 889e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 890e81bb599SAmlan Barua indx1[i*dim[1]+j] = i*dim[1] + j; 891e81bb599SAmlan Barua } 892e81bb599SAmlan Barua } 893e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 894e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 895e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 897e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 898b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 899b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 900e81bb599SAmlan Barua break; 901e81bb599SAmlan Barua case 3: 902e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 903e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 904e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 905e81bb599SAmlan Barua for (k=0;k<dim[2];k++){ 906e81bb599SAmlan Barua indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 907e81bb599SAmlan Barua } 908e81bb599SAmlan Barua } 909e81bb599SAmlan Barua } 910e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 911e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 912e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 913e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 914e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 915b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 916b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 917e81bb599SAmlan Barua break; 918e81bb599SAmlan Barua default: 9197536937bSAmlan Barua */ 9209496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 9216971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 9226971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9236971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9246971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 925b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 9267536937bSAmlan Barua // break; 9277536937bSAmlan Barua // } 928e81bb599SAmlan Barua } 929e81bb599SAmlan Barua 930e81bb599SAmlan Barua else{ 931e81bb599SAmlan Barua 9323c3a9c75SAmlan Barua switch (ndim){ 9333c3a9c75SAmlan Barua case 1: 93464657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 93564657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 93664657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 93764657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 93864657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 93964657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 94064657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 94164657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 94264657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 94364657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 94464657d84SAmlan Barua #else 945e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 94664657d84SAmlan Barua #endif 9473c3a9c75SAmlan Barua break; 9483c3a9c75SAmlan Barua case 2: 949bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 950bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 951bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 952bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 953bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 954bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 955bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 956bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 957bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 958bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 959bd59e6c6SAmlan Barua #else 9603c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 9613c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 9629496c9aeSAmlan Barua //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 9639496c9aeSAmlan Barua //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 9649496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 9659496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 9663c3a9c75SAmlan Barua 9673c3a9c75SAmlan Barua if (dim[1]%2==0) 9683c3a9c75SAmlan Barua NM = dim[1]+2; 9693c3a9c75SAmlan Barua else 9703c3a9c75SAmlan Barua NM = dim[1]+1; 9713c3a9c75SAmlan Barua 9723c3a9c75SAmlan Barua for (i=0;i<local_n0;i++){ 9733c3a9c75SAmlan Barua for (j=0;j<dim[1];j++){ 9743c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 9753c3a9c75SAmlan Barua tempindx1 = i*NM + j; 9765b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 9773c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 9783c3a9c75SAmlan Barua } 9793c3a9c75SAmlan Barua } 9803c3a9c75SAmlan Barua 9813c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 9823c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 9833c3a9c75SAmlan Barua 984f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 985f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 986f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 988b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 989b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 990b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 991b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 992bd59e6c6SAmlan Barua #endif 9939496c9aeSAmlan Barua break; 9943c3a9c75SAmlan Barua 9953c3a9c75SAmlan Barua case 3: 996bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 997bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 998bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 999bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1000bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1001bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1002bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1003bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1006bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1007bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1008bd59e6c6SAmlan Barua #else 100951d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 101051d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 101151d1eed7SAmlan Barua 10129496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 10139496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 10149496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 10159496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 101651d1eed7SAmlan Barua 101751d1eed7SAmlan Barua if (dim[2]%2==0) 101851d1eed7SAmlan Barua NM = dim[2]+2; 101951d1eed7SAmlan Barua else 102051d1eed7SAmlan Barua NM = dim[2]+1; 102151d1eed7SAmlan Barua 102251d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 102351d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 102451d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 102551d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 102651d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 102751d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 102851d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 102951d1eed7SAmlan Barua } 103051d1eed7SAmlan Barua } 103151d1eed7SAmlan Barua } 103251d1eed7SAmlan Barua 103351d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 103451d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 103551d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 103651d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 103751d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 103851d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1039b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1040b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1041b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1042b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1043bd59e6c6SAmlan Barua #endif 10449496c9aeSAmlan Barua break; 10453c3a9c75SAmlan Barua 10463c3a9c75SAmlan Barua default: 1047bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1048bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1049bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1050bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1051bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1052bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1054bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1055bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1056bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1057bd59e6c6SAmlan Barua #else 1058e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1059e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1060e5d7f247SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1061e5d7f247SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1062e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1063e5d7f247SAmlan Barua 1064e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 1065e5d7f247SAmlan Barua 1066e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1067e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1068e5d7f247SAmlan Barua 1069e5d7f247SAmlan Barua if (dim[ndim-1]%2==0) 1070ba6e06d0SAmlan Barua NM = 2; 1071e5d7f247SAmlan Barua else 1072ba6e06d0SAmlan Barua NM = 1; 1073e5d7f247SAmlan Barua 10746971673cSAmlan Barua j = low; 10756971673cSAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 10766971673cSAmlan Barua { 10776971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 10786971673cSAmlan Barua indx2[i] = j; 10796971673cSAmlan Barua if (k%dim[ndim-1]==0) 10806971673cSAmlan Barua { j+=NM;} 10816971673cSAmlan Barua j++; 10826971673cSAmlan Barua } 10836971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 10846971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 10856971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 10866971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10876971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10886971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1089b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1090b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1091b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1092b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1093bd59e6c6SAmlan Barua #endif 10949496c9aeSAmlan Barua break; 10953c3a9c75SAmlan Barua } 1096e81bb599SAmlan Barua } 10973c3a9c75SAmlan Barua 10983c3a9c75SAmlan Barua return 0; 10993c3a9c75SAmlan Barua } 11003c3a9c75SAmlan Barua EXTERN_C_END 11013c3a9c75SAmlan Barua 11023c3a9c75SAmlan Barua #undef __FUNCT__ 11033c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT" 11043c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 11053c3a9c75SAmlan Barua { 11063c3a9c75SAmlan Barua PetscErrorCode ierr; 11073c3a9c75SAmlan Barua PetscFunctionBegin; 11083c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 11093c3a9c75SAmlan Barua PetscFunctionReturn(0); 11103c3a9c75SAmlan Barua } 111154efbe56SHong Zhang 11125b3e41ffSAmlan Barua /* 11135b3e41ffSAmlan Barua OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 11145b3e41ffSAmlan Barua Input A, x, y 11155b3e41ffSAmlan Barua A - FFTW matrix 11165b3e41ffSAmlan Barua x - FFTW vector 11175b3e41ffSAmlan Barua y - PETSc vector that user can read 11185b3e41ffSAmlan Barua Options Database Keys: 11195b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 11205b3e41ffSAmlan Barua 11215b3e41ffSAmlan Barua Level: intermediate 11225b3e41ffSAmlan Barua 11233c3a9c75SAmlan Barua */ 11243c3a9c75SAmlan Barua 11253c3a9c75SAmlan Barua EXTERN_C_BEGIN 11263c3a9c75SAmlan Barua #undef __FUNCT__ 11275b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW" 11285b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 11295b3e41ffSAmlan Barua { 11305b3e41ffSAmlan Barua PetscErrorCode ierr; 11315b3e41ffSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 11325b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 11335b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 11349496c9aeSAmlan Barua PetscInt N=fft->N; 1135b3655f67SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 11369496c9aeSAmlan Barua PetscInt low; 11379496c9aeSAmlan Barua PetscInt rank,size; 11385b3e41ffSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 11399496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 11405b3e41ffSAmlan Barua VecScatter vecscat; 11415b3e41ffSAmlan Barua IS list1,list2; 11429496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11439496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 11449496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 11459496c9aeSAmlan Barua PetscInt N1, n1 ,NM; 11469496c9aeSAmlan Barua ptrdiff_t temp; 11479496c9aeSAmlan Barua #endif 11489496c9aeSAmlan Barua 11495b3e41ffSAmlan Barua 11505b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 11515b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1152b3655f67SAmlan Barua ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr); 11535b3e41ffSAmlan Barua 1154e81bb599SAmlan Barua if (size==1){ 11557536937bSAmlan Barua /* 11565b3e41ffSAmlan Barua switch (ndim){ 11575b3e41ffSAmlan Barua case 1: 1158e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 1159e81bb599SAmlan Barua for (i=0;i<dim[0];i++) 1160e81bb599SAmlan Barua { 1161e81bb599SAmlan Barua indx1[i] = i; 1162e81bb599SAmlan Barua } 1163e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1164e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1165e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1166e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1167e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1168b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1169b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1170e81bb599SAmlan Barua break; 1171e81bb599SAmlan Barua 1172e81bb599SAmlan Barua case 2: 1173e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 1174e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 1175e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 1176e81bb599SAmlan Barua indx1[i*dim[1]+j] = i*dim[1] + j; 1177e81bb599SAmlan Barua } 1178e81bb599SAmlan Barua } 1179e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1180e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1181e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1182e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1183e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1184b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1185b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1186e81bb599SAmlan Barua break; 1187e81bb599SAmlan Barua case 3: 1188e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1189e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 1190e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 1191e81bb599SAmlan Barua for (k=0;k<dim[2];k++){ 1192e81bb599SAmlan Barua indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 1193e81bb599SAmlan Barua } 1194e81bb599SAmlan Barua } 1195e81bb599SAmlan Barua } 1196e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1197e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1198e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1199e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1200e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1201b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1202b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1203e81bb599SAmlan Barua break; 1204e81bb599SAmlan Barua default: 12057536937bSAmlan Barua */ 1206b3655f67SAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 12076971673cSAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF); 12086971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 12096971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12106971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12116971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1212b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 12137536937bSAmlan Barua // break; 12147536937bSAmlan Barua // } 1215e81bb599SAmlan Barua } 1216e81bb599SAmlan Barua else{ 1217e81bb599SAmlan Barua 1218e81bb599SAmlan Barua switch (ndim){ 1219e81bb599SAmlan Barua case 1: 122064657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 122164657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 12229496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1); 12239496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2); 122464657d84SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 122564657d84SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 122664657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 122764657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 122864657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 122964657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 123064657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 123164657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 123264657d84SAmlan Barua #else 12336a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 123464657d84SAmlan Barua #endif 12355b3e41ffSAmlan Barua break; 12365b3e41ffSAmlan Barua case 2: 1237bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1238bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1239bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 1240bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 1241bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1242bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1243bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1245bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1246bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1247bd59e6c6SAmlan Barua #else 12485b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 12495b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 12505b3e41ffSAmlan Barua 12519496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 12529496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 12535b3e41ffSAmlan Barua 12545b3e41ffSAmlan Barua if (dim[1]%2==0) 12555b3e41ffSAmlan Barua NM = dim[1]+2; 12565b3e41ffSAmlan Barua else 12575b3e41ffSAmlan Barua NM = dim[1]+1; 12585b3e41ffSAmlan Barua 12595b3e41ffSAmlan Barua for (i=0;i<local_n0;i++){ 12605b3e41ffSAmlan Barua for (j=0;j<dim[1];j++){ 12615b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 12625b3e41ffSAmlan Barua tempindx1 = i*NM + j; 12635b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 12645b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 12655b3e41ffSAmlan Barua } 12665b3e41ffSAmlan Barua } 12675b3e41ffSAmlan Barua 12685b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 12695b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 12705b3e41ffSAmlan Barua 12715b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 12725b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12735b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12745b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1275b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1276b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1277b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1278b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1279bd59e6c6SAmlan Barua #endif 12809496c9aeSAmlan Barua break; 12815b3e41ffSAmlan Barua case 3: 1282bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1283bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1284bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1285bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1286bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1287bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1288bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1289bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1290bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1291bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1292bd59e6c6SAmlan Barua #else 1293bd59e6c6SAmlan Barua 129451d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 129551d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 129651d1eed7SAmlan Barua 12979496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 12989496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 12999496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 13009496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 130151d1eed7SAmlan Barua 130251d1eed7SAmlan Barua if (dim[2]%2==0) 130351d1eed7SAmlan Barua NM = dim[2]+2; 130451d1eed7SAmlan Barua else 130551d1eed7SAmlan Barua NM = dim[2]+1; 130651d1eed7SAmlan Barua 130751d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 130851d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 130951d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 131051d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 131151d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 131251d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 131351d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 131451d1eed7SAmlan Barua } 131551d1eed7SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 131651d1eed7SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 131751d1eed7SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 131851d1eed7SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 131951d1eed7SAmlan Barua } 132051d1eed7SAmlan Barua } 132151d1eed7SAmlan Barua 132251d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 132351d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 132451d1eed7SAmlan Barua 132551d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 132651d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 132751d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 132851d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1329b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1330b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1331b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1332b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1333bd59e6c6SAmlan Barua #endif 13349496c9aeSAmlan Barua break; 13355b3e41ffSAmlan Barua default: 1336bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1337bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1338bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1339bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1340bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1341bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1342bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1343bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1344bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1345bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1346bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1347bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1348bd59e6c6SAmlan Barua #else 1349ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1350ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1351ba6e06d0SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1352ba6e06d0SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1353ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1354ba6e06d0SAmlan Barua 1355ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1356ba6e06d0SAmlan Barua 1357ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1358ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1359ba6e06d0SAmlan Barua 1360ba6e06d0SAmlan Barua if (dim[ndim-1]%2==0) 1361ba6e06d0SAmlan Barua NM = 2; 1362ba6e06d0SAmlan Barua else 1363ba6e06d0SAmlan Barua NM = 1; 1364ba6e06d0SAmlan Barua 1365ba6e06d0SAmlan Barua j = low; 1366ba6e06d0SAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1367ba6e06d0SAmlan Barua { 1368ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1369ba6e06d0SAmlan Barua indx2[i] = j; 1370ba6e06d0SAmlan Barua if (k%dim[ndim-1]==0) 1371ba6e06d0SAmlan Barua { j+=NM;} 1372ba6e06d0SAmlan Barua j++; 1373ba6e06d0SAmlan Barua } 1374ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1375ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1376ba6e06d0SAmlan Barua 1377ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1378ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1379ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1380ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1381b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1382b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1383b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1384b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1385bd59e6c6SAmlan Barua #endif 13869496c9aeSAmlan Barua break; 13875b3e41ffSAmlan Barua } 1388e81bb599SAmlan Barua } 13895b3e41ffSAmlan Barua return 0; 13905b3e41ffSAmlan Barua } 13915b3e41ffSAmlan Barua EXTERN_C_END 13925b3e41ffSAmlan Barua 13935b3e41ffSAmlan Barua EXTERN_C_BEGIN 13945b3e41ffSAmlan Barua #undef __FUNCT__ 13953c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 13963c3a9c75SAmlan Barua /* 13973c3a9c75SAmlan Barua MatCreate_FFTW - Creates a matrix object that provides FFT 13983c3a9c75SAmlan Barua via the external package FFTW 13993c3a9c75SAmlan Barua Options Database Keys: 14003c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 14013c3a9c75SAmlan Barua 14023c3a9c75SAmlan Barua Level: intermediate 14033c3a9c75SAmlan Barua 14043c3a9c75SAmlan Barua */ 14053c3a9c75SAmlan Barua 1406b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 1407b2b77a04SHong Zhang { 1408b2b77a04SHong Zhang PetscErrorCode ierr; 1409b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 1410b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1411b2b77a04SHong Zhang Mat_FFTW *fftw; 1412b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1413b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1414b2b77a04SHong Zhang PetscBool flg; 14159496c9aeSAmlan Barua PetscInt p_flag,partial_dim=1,ctr,n1; 141611902ff2SHong Zhang PetscMPIInt size,rank; 14179496c9aeSAmlan Barua ptrdiff_t *pdim; 14189496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 14199496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 14209496c9aeSAmlan Barua ptrdiff_t temp; 14219496c9aeSAmlan Barua PetscInt N1; 14229496c9aeSAmlan Barua #endif 14239496c9aeSAmlan Barua 1424b2b77a04SHong Zhang 1425b2b77a04SHong Zhang PetscFunctionBegin; 1426b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 142711902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 142854dd5118SAmlan Barua ierr = MPI_Barrier(PETSC_COMM_WORLD); 1429c92418ccSAmlan Barua 143011902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 143111902ff2SHong Zhang pdim[0] = dim[0]; 143211902ff2SHong Zhang for (ctr=1;ctr<ndim;ctr++) 143311902ff2SHong Zhang { 14346882372aSHong Zhang partial_dim *= dim[ctr]; 143511902ff2SHong Zhang pdim[ctr] = dim[ctr]; 14366882372aSHong Zhang } 14373c3a9c75SAmlan Barua 1438b2b77a04SHong Zhang if (size == 1) { 1439e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1440b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1441b2b77a04SHong Zhang n = N; 1442e81bb599SAmlan Barua #else 14439496c9aeSAmlan Barua PetscInt tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1]; 1444e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1445e81bb599SAmlan Barua n = tot_dim; 1446e81bb599SAmlan Barua #endif 1447e81bb599SAmlan Barua 1448b2b77a04SHong Zhang } else { 14499496c9aeSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start;//local_1_end; 1450b2b77a04SHong Zhang switch (ndim){ 1451b2b77a04SHong Zhang case 1: 14523c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 14533c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1454e5d7f247SAmlan Barua #else 14559496c9aeSAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 14566882372aSHong Zhang n = (PetscInt)local_n0; 14579496c9aeSAmlan Barua n1 = (PetscInt) local_n1; 14589496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1459e5d7f247SAmlan Barua #endif 1460b2b77a04SHong Zhang break; 1461b2b77a04SHong Zhang case 2: 14625b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 1463b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1464b2b77a04SHong Zhang /* 1465b2b77a04SHong Zhang PetscMPIInt rank; 1466b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1467b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 1468b2b77a04SHong Zhang */ 1469b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1470b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 14715b3e41ffSAmlan Barua #else 14725b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 14735b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1474*c8a8a4f0SAmlan Barua // n1 = 2*(PetscInt)local_n1*(dim[0]); 1475*c8a8a4f0SAmlan Barua // ierr = MatSetSizes(A,n1,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1476*c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 14775b3e41ffSAmlan Barua #endif 1478b2b77a04SHong Zhang break; 1479b2b77a04SHong Zhang case 3: 148051d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 148151d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 14826882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 14836882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 148451d1eed7SAmlan Barua #else 148551d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 148651d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1487*c8a8a4f0SAmlan Barua // n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 1488*c8a8a4f0SAmlan Barua // ierr = MatSetSizes(A,n1,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1489*c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 149051d1eed7SAmlan Barua #endif 1491b2b77a04SHong Zhang break; 1492b2b77a04SHong Zhang default: 1493b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 149411902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 14956882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 14966882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1497b3a17365SAmlan Barua #else 1498b3a17365SAmlan Barua temp = pdim[ndim-1]; 1499b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 1500b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1501b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1502b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1503b3a17365SAmlan Barua pdim[ndim-1] = temp; 1504*c8a8a4f0SAmlan Barua // temp = partial_dim*(dim[ndim-1]/2+1)*dim[0]/(dim[1]*dim[ndim-1]); 1505*c8a8a4f0SAmlan Barua // n1 = 2*local_n1*temp; 1506*c8a8a4f0SAmlan Barua // ierr = MatSetSizes(A,n1,n,N1,N1);CHKERRQ(ierr); 1507*c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1508b3a17365SAmlan Barua #endif 1509b2b77a04SHong Zhang break; 1510b2b77a04SHong Zhang } 1511b2b77a04SHong Zhang } 1512b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1513b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1514b2b77a04SHong Zhang fft->data = (void*)fftw; 1515b2b77a04SHong Zhang 1516b2b77a04SHong Zhang fft->n = n; 1517c92418ccSAmlan Barua fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1518e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 1519c92418ccSAmlan Barua ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1520b1301b2eSHong Zhang for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1521c92418ccSAmlan Barua 1522b2b77a04SHong Zhang fftw->p_forward = 0; 1523b2b77a04SHong Zhang fftw->p_backward = 0; 1524b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1525b2b77a04SHong Zhang 1526b2b77a04SHong Zhang if (size == 1){ 1527b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1528b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1529b2b77a04SHong Zhang } else { 1530b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1531b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1532b2b77a04SHong Zhang } 1533b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 15346a506ed8SAmlan Barua // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D 15354be45526SHong Zhang //A->ops->getvecs = MatGetVecs_FFTW; 1536b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 15374be45526SHong Zhang PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW); 15383c3a9c75SAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 15395b3e41ffSAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 1540b2b77a04SHong Zhang 1541b2b77a04SHong Zhang /* get runtime options */ 1542b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1543b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1544b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 1545b2b77a04SHong Zhang PetscOptionsEnd(); 1546b2b77a04SHong Zhang PetscFunctionReturn(0); 1547b2b77a04SHong Zhang } 1548b2b77a04SHong Zhang EXTERN_C_END 15493c3a9c75SAmlan Barua 15503c3a9c75SAmlan Barua 15513c3a9c75SAmlan Barua 15523c3a9c75SAmlan Barua 1553