1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14e5d7f247SAmlan Barua PetscInt partial_dim; 15b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 16b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 19b2b77a04SHong Zhang } Mat_FFTW; 20b2b77a04SHong Zhang 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 274be45526SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); // to be removed! 28b2b77a04SHong Zhang 29b2b77a04SHong Zhang #undef __FUNCT__ 30b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 31b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 32b2b77a04SHong Zhang { 33b2b77a04SHong Zhang PetscErrorCode ierr; 34b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 35b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 36b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 37b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 38b2b77a04SHong Zhang 39b2b77a04SHong Zhang PetscFunctionBegin; 40b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 41b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 42b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 43b2b77a04SHong Zhang switch (ndim){ 44b2b77a04SHong Zhang case 1: 4558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 46b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 4758a912c5SAmlan Barua #else 4858a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 4958a912c5SAmlan Barua #endif 50b2b77a04SHong Zhang break; 51b2b77a04SHong Zhang case 2: 5258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 53b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 5458a912c5SAmlan Barua #else 5558a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 5658a912c5SAmlan Barua #endif 57b2b77a04SHong Zhang break; 58b2b77a04SHong Zhang case 3: 5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 60b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6158a912c5SAmlan Barua #else 6258a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 6358a912c5SAmlan Barua #endif 64b2b77a04SHong Zhang break; 65b2b77a04SHong Zhang default: 6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 67b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6858a912c5SAmlan Barua #else 6958a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 7058a912c5SAmlan Barua #endif 71b2b77a04SHong Zhang break; 72b2b77a04SHong Zhang } 73b2b77a04SHong Zhang fftw->finarray = x_array; 74b2b77a04SHong Zhang fftw->foutarray = y_array; 75b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 76b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 77b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 78b2b77a04SHong Zhang } else { /* use existing plan */ 79b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 80b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 81b2b77a04SHong Zhang } else { 82b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 83b2b77a04SHong Zhang } 84b2b77a04SHong Zhang } 85b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 86b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 87b2b77a04SHong Zhang PetscFunctionReturn(0); 88b2b77a04SHong Zhang } 89b2b77a04SHong Zhang 90b2b77a04SHong Zhang #undef __FUNCT__ 91b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 92b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 93b2b77a04SHong Zhang { 94b2b77a04SHong Zhang PetscErrorCode ierr; 95b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 96b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 97b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 98b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 99b2b77a04SHong Zhang 100b2b77a04SHong Zhang PetscFunctionBegin; 101b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 102b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 103b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 104b2b77a04SHong Zhang switch (ndim){ 105b2b77a04SHong Zhang case 1: 10658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 107b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 10858a912c5SAmlan Barua #else 109e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 11058a912c5SAmlan Barua #endif 111b2b77a04SHong Zhang break; 112b2b77a04SHong Zhang case 2: 11358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 114b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 11558a912c5SAmlan Barua #else 116e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 11758a912c5SAmlan Barua #endif 118b2b77a04SHong Zhang break; 119b2b77a04SHong Zhang case 3: 12058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 121b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12258a912c5SAmlan Barua #else 123e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 12458a912c5SAmlan Barua #endif 125b2b77a04SHong Zhang break; 126b2b77a04SHong Zhang default: 12758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 128b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12958a912c5SAmlan Barua #else 130e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 13158a912c5SAmlan Barua #endif 132b2b77a04SHong Zhang break; 133b2b77a04SHong Zhang } 134b2b77a04SHong Zhang fftw->binarray = x_array; 135b2b77a04SHong Zhang fftw->boutarray = y_array; 136b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 137b2b77a04SHong Zhang } else { /* use existing plan */ 138b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 139b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 140b2b77a04SHong Zhang } else { 141b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 142b2b77a04SHong Zhang } 143b2b77a04SHong Zhang } 144b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 145b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 146b2b77a04SHong Zhang PetscFunctionReturn(0); 147b2b77a04SHong Zhang } 148b2b77a04SHong Zhang 149b2b77a04SHong Zhang #undef __FUNCT__ 150b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 151b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 152b2b77a04SHong Zhang { 153b2b77a04SHong Zhang PetscErrorCode ierr; 154b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 155b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 156b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 157c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 158b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 159b2b77a04SHong Zhang 160b2b77a04SHong Zhang PetscFunctionBegin; 161b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 162b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 163b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 164b2b77a04SHong Zhang switch (ndim){ 165b2b77a04SHong Zhang case 1: 1663c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 167b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 168ae0a50aaSHong Zhang #else 169ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 1703c3a9c75SAmlan Barua #endif 171b2b77a04SHong Zhang break; 172b2b77a04SHong Zhang case 2: 1733c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 174b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1753c3a9c75SAmlan Barua #else 1763c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1773c3a9c75SAmlan Barua #endif 178b2b77a04SHong Zhang break; 179b2b77a04SHong Zhang case 3: 1803c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 181b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1823c3a9c75SAmlan Barua #else 18351d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1843c3a9c75SAmlan Barua #endif 185b2b77a04SHong Zhang break; 186b2b77a04SHong Zhang default: 1873c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 188c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1893c3a9c75SAmlan Barua #else 1903c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1913c3a9c75SAmlan Barua #endif 192b2b77a04SHong Zhang break; 193b2b77a04SHong Zhang } 194b2b77a04SHong Zhang fftw->finarray = x_array; 195b2b77a04SHong Zhang fftw->foutarray = y_array; 196b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 197b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 198b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 199b2b77a04SHong Zhang } else { /* use existing plan */ 200b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 201b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 202b2b77a04SHong Zhang } else { 203b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 204b2b77a04SHong Zhang } 205b2b77a04SHong Zhang } 206b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 207b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 208b2b77a04SHong Zhang PetscFunctionReturn(0); 209b2b77a04SHong Zhang } 210b2b77a04SHong Zhang 211b2b77a04SHong Zhang #undef __FUNCT__ 212b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 213b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 214b2b77a04SHong Zhang { 215b2b77a04SHong Zhang PetscErrorCode ierr; 216b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 217b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 218b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 219c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 220b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 221b2b77a04SHong Zhang 222b2b77a04SHong Zhang PetscFunctionBegin; 223b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 224b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 225b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 226b2b77a04SHong Zhang switch (ndim){ 227b2b77a04SHong Zhang case 1: 2283c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 229b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 230ae0a50aaSHong Zhang #else 231ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 2323c3a9c75SAmlan Barua #endif 233b2b77a04SHong Zhang break; 234b2b77a04SHong Zhang case 2: 2353c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 236b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2373c3a9c75SAmlan Barua #else 2383c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2393c3a9c75SAmlan Barua #endif 240b2b77a04SHong Zhang break; 241b2b77a04SHong Zhang case 3: 2423c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 243b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2443c3a9c75SAmlan Barua #else 2453c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2463c3a9c75SAmlan Barua #endif 247b2b77a04SHong Zhang break; 248b2b77a04SHong Zhang default: 2493c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 250c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2513c3a9c75SAmlan Barua #else 2523c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2533c3a9c75SAmlan Barua #endif 254b2b77a04SHong Zhang break; 255b2b77a04SHong Zhang } 256b2b77a04SHong Zhang fftw->binarray = x_array; 257b2b77a04SHong Zhang fftw->boutarray = y_array; 258b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 259b2b77a04SHong Zhang } else { /* use existing plan */ 260b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 261b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 262b2b77a04SHong Zhang } else { 263b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 264b2b77a04SHong Zhang } 265b2b77a04SHong Zhang } 266b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 267b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 268b2b77a04SHong Zhang PetscFunctionReturn(0); 269b2b77a04SHong Zhang } 270b2b77a04SHong Zhang 271b2b77a04SHong Zhang #undef __FUNCT__ 272b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 273b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 274b2b77a04SHong Zhang { 275b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 276b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 277b2b77a04SHong Zhang PetscErrorCode ierr; 278b2b77a04SHong Zhang 279b2b77a04SHong Zhang PetscFunctionBegin; 280b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 281b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 282b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 283bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 284b2b77a04SHong Zhang PetscFunctionReturn(0); 285b2b77a04SHong Zhang } 286b2b77a04SHong Zhang 287c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 288b2b77a04SHong Zhang #undef __FUNCT__ 289b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 290b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 291b2b77a04SHong Zhang { 292b2b77a04SHong Zhang PetscErrorCode ierr; 293b2b77a04SHong Zhang PetscScalar *array; 294b2b77a04SHong Zhang 295b2b77a04SHong Zhang PetscFunctionBegin; 296b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 297b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 298b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 299b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 300b2b77a04SHong Zhang PetscFunctionReturn(0); 301b2b77a04SHong Zhang } 302b2b77a04SHong Zhang 303b2b77a04SHong Zhang #undef __FUNCT__ 3044be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_1DC" 305c92418ccSAmlan Barua /* 3064be45526SHong Zhang - Get Vectors(s) compatible with matrix, i.e. with the 307c92418ccSAmlan Barua parallel layout determined by FFTW-1D 308c92418ccSAmlan Barua 309c92418ccSAmlan Barua */ 3104be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_1DC(Mat A,Vec *fin,Vec *fout,Vec *bout) 311c92418ccSAmlan Barua { 312c92418ccSAmlan Barua PetscErrorCode ierr; 313c92418ccSAmlan Barua PetscMPIInt size,rank; 314c92418ccSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 315c92418ccSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 316c92418ccSAmlan Barua // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 317c92418ccSAmlan Barua PetscInt N=fft->N; 318c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 319c92418ccSAmlan Barua ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 320c92418ccSAmlan Barua ptrdiff_t f_local_n1,f_local_1_end; 321c92418ccSAmlan Barua ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 322c92418ccSAmlan Barua ptrdiff_t b_local_n1,b_local_1_end; 323ae0a50aaSHong Zhang fftw_complex *data_fin,*data_fout,*data_bout; 324c92418ccSAmlan Barua 325c92418ccSAmlan Barua PetscFunctionBegin; 326c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 327c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 328c92418ccSAmlan Barua #endif 329c92418ccSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 330c92418ccSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 331c92418ccSAmlan Barua if (size == 1){ 332c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 333c92418ccSAmlan Barua } 334c92418ccSAmlan Barua else { 335c92418ccSAmlan Barua if (ndim>1){ 336c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 337c92418ccSAmlan Barua else { 338c92418ccSAmlan Barua f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 339c92418ccSAmlan Barua b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 340c92418ccSAmlan Barua if (fin) { 341c92418ccSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 342c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 343c92418ccSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 344c92418ccSAmlan Barua } 345c92418ccSAmlan Barua if (fout) { 346c92418ccSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 347c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 348c92418ccSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 349c92418ccSAmlan Barua } 350c92418ccSAmlan Barua if (bout) { 351c92418ccSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 352c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 353c92418ccSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 354c92418ccSAmlan Barua } 355c92418ccSAmlan Barua } 356c92418ccSAmlan Barua if (fin){ 357c92418ccSAmlan Barua ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 358c92418ccSAmlan Barua } 359c92418ccSAmlan Barua if (fout){ 360c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 361c92418ccSAmlan Barua } 362c92418ccSAmlan Barua if (bout){ 363c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 364c92418ccSAmlan Barua } 365c92418ccSAmlan Barua PetscFunctionReturn(0); 366c92418ccSAmlan Barua } 367c92418ccSAmlan Barua 368c92418ccSAmlan Barua 369c92418ccSAmlan Barua } 3703c3a9c75SAmlan Barua 3714f7415efSAmlan Barua #undef __FUNCT__ 3724be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW" 3734be45526SHong Zhang /*@ 374b2aa233eSHong Zhang MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 3754f7415efSAmlan Barua parallel layout determined by FFTW 3764f7415efSAmlan Barua 3774f7415efSAmlan Barua Collective on Mat 3784f7415efSAmlan Barua 3794f7415efSAmlan Barua Input Parameter: 3804f7415efSAmlan Barua . mat - the matrix 3814f7415efSAmlan Barua 3824f7415efSAmlan Barua Output Parameter: 3834f7415efSAmlan Barua + fin - (optional) input vector of forward FFTW 3844f7415efSAmlan Barua - fout - (optional) output vector of forward FFTW 3854f7415efSAmlan Barua 3864f7415efSAmlan Barua Level: advanced 3874f7415efSAmlan Barua 3884f7415efSAmlan Barua .seealso: MatCreateFFTW() 3894be45526SHong Zhang @*/ 3904be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 3914be45526SHong Zhang { 3924be45526SHong Zhang PetscErrorCode ierr; 3934be45526SHong Zhang PetscFunctionBegin; 3944be45526SHong Zhang ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 3954be45526SHong Zhang PetscFunctionReturn(0); 3964be45526SHong Zhang } 3974be45526SHong Zhang 3984f7415efSAmlan Barua EXTERN_C_BEGIN 3994be45526SHong Zhang #undef __FUNCT__ 4004be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW" 4014be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 4024f7415efSAmlan Barua { 4034f7415efSAmlan Barua PetscErrorCode ierr; 4044f7415efSAmlan Barua PetscMPIInt size,rank; 4054f7415efSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 4064f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 4074f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 408*9496c9aeSAmlan Barua PetscInt N=fft->N; 4094f7415efSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 4104f7415efSAmlan Barua 4114f7415efSAmlan Barua PetscFunctionBegin; 4124f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 4134f7415efSAmlan Barua PetscValidType(A,1); 4144f7415efSAmlan Barua 4154f7415efSAmlan Barua ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 4164f7415efSAmlan Barua ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 4174f7415efSAmlan Barua if (size == 1){ /* sequential case */ 4184f7415efSAmlan Barua printf("Routine is getting called\n"); 4194f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4204f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 4214f7415efSAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 4224f7415efSAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 4234f7415efSAmlan Barua #else 424*9496c9aeSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 4254f7415efSAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 4264f7415efSAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);} 4274f7415efSAmlan Barua #endif 4284f7415efSAmlan Barua } else { 4294f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 430*9496c9aeSAmlan Barua ptrdiff_t local_n1; 431*9496c9aeSAmlan Barua fftw_complex *data_fout; 432*9496c9aeSAmlan Barua ptrdiff_t local_1_start; 433*9496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 434*9496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 435*9496c9aeSAmlan Barua #else 4364f7415efSAmlan Barua double *data_finr,*data_boutr; 437*9496c9aeSAmlan Barua PetscInt n1,N1,vsize; 438*9496c9aeSAmlan Barua ptrdiff_t temp; 439*9496c9aeSAmlan Barua #endif 440*9496c9aeSAmlan Barua 4414f7415efSAmlan Barua switch (ndim){ 4424f7415efSAmlan Barua case 1: 44357625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 44457625b84SAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 44557625b84SAmlan Barua #else 446*9496c9aeSAmlan Barua //SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet"); 44757625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 44857625b84SAmlan Barua if (fin) { 44957625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 45057625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 45157625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 45257625b84SAmlan Barua } 45357625b84SAmlan Barua if (fout) { 45457625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 45557625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 45657625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 45757625b84SAmlan Barua } 45857625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 45957625b84SAmlan Barua if (bout) { 46057625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 46157625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 46257625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 46357625b84SAmlan Barua } 46457625b84SAmlan Barua break; 46557625b84SAmlan Barua #endif 4664f7415efSAmlan Barua case 2: 4674f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4684f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4694f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 4704f7415efSAmlan Barua if (fin) { 4714f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4724f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 4734f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4744f7415efSAmlan Barua } 4754f7415efSAmlan Barua if (bout) { 4764f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4774f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 4784f7415efSAmlan Barua ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 4794f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4804f7415efSAmlan Barua } 481*9496c9aeSAmlan Barua n1 = 2*local_n1*dim[0]; 48257625b84SAmlan Barua if (fout) { 48357625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 484*9496c9aeSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 48557625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 48657625b84SAmlan Barua } 4874f7415efSAmlan Barua #else 4884f7415efSAmlan Barua /* Get local size */ 4894f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 4904f7415efSAmlan Barua if (fin) { 4914f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4924f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4934f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4944f7415efSAmlan Barua } 4954f7415efSAmlan Barua if (fout) { 4964f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4974f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4984f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4994f7415efSAmlan Barua } 5004f7415efSAmlan Barua if (bout) { 5014f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5024f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5034f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5044f7415efSAmlan Barua } 5054f7415efSAmlan Barua #endif 5064f7415efSAmlan Barua break; 5074f7415efSAmlan Barua case 3: 5084f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5094f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5104f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 5114f7415efSAmlan Barua if (fin) { 5124f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5134f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 5144f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5154f7415efSAmlan Barua } 5164f7415efSAmlan Barua if (bout) { 5174f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5184f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 5194f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5204f7415efSAmlan Barua } 521*9496c9aeSAmlan Barua n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 52257625b84SAmlan Barua if (fout) { 52357625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 52457625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 52557625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 52657625b84SAmlan Barua } 5274f7415efSAmlan Barua #else 5280c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 5290c9b18e4SAmlan Barua if (fin) { 5300c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5310c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5320c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5330c9b18e4SAmlan Barua } 5340c9b18e4SAmlan Barua if (fout) { 5350c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5360c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5370c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5380c9b18e4SAmlan Barua } 5390c9b18e4SAmlan Barua if (bout) { 5400c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5410c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5420c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5430c9b18e4SAmlan Barua } 5444f7415efSAmlan Barua #endif 5454f7415efSAmlan Barua break; 5464f7415efSAmlan Barua default: 5474f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5484f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 5494f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 5504f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5514f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 5524f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 5534f7415efSAmlan Barua 5544f7415efSAmlan Barua if (fin) { 5554f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5564f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 5574f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5584f7415efSAmlan Barua } 5594f7415efSAmlan Barua if (bout) { 5604f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5614f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 562*9496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5634f7415efSAmlan Barua } 564*9496c9aeSAmlan Barua temp = fftw->partial_dim; 565*9496c9aeSAmlan Barua fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 566*9496c9aeSAmlan Barua n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 56757625b84SAmlan Barua if (fout) { 56857625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 56957625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 57057625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 57157625b84SAmlan Barua } 5724f7415efSAmlan Barua #else 5730c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 5740c9b18e4SAmlan Barua if (fin) { 5750c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5760c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5770c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5780c9b18e4SAmlan Barua } 5790c9b18e4SAmlan Barua if (fout) { 5800c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5810c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5820c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5830c9b18e4SAmlan Barua } 5840c9b18e4SAmlan Barua if (bout) { 5850c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5860c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5870c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5880c9b18e4SAmlan Barua } 5894f7415efSAmlan Barua #endif 5904f7415efSAmlan Barua break; 5914f7415efSAmlan Barua } 5924f7415efSAmlan Barua } 5934f7415efSAmlan Barua PetscFunctionReturn(0); 5944f7415efSAmlan Barua } 5954f7415efSAmlan Barua EXTERN_C_END 5964f7415efSAmlan Barua 597c92418ccSAmlan Barua #undef __FUNCT__ 598b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW" 599b2b77a04SHong Zhang /* 600b2b77a04SHong Zhang MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 601b2b77a04SHong Zhang parallel layout determined by FFTW 602b2b77a04SHong Zhang 603b2b77a04SHong Zhang Collective on Mat 604b2b77a04SHong Zhang 605b2b77a04SHong Zhang Input Parameter: 606b2b77a04SHong Zhang . mat - the matrix 607b2b77a04SHong Zhang 608b2b77a04SHong Zhang Output Parameter: 609b2b77a04SHong Zhang + fin - (optional) input vector of forward FFTW 610b2b77a04SHong Zhang - fout - (optional) output vector of forward FFTW 611b2b77a04SHong Zhang 612b2b77a04SHong Zhang Level: advanced 613b2b77a04SHong Zhang 614b2b77a04SHong Zhang .seealso: MatCreateFFTW() 615b2b77a04SHong Zhang */ 616b2b77a04SHong Zhang PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 617b2b77a04SHong Zhang { 618b2b77a04SHong Zhang PetscErrorCode ierr; 619b2b77a04SHong Zhang PetscMPIInt size,rank; 620b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 621b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 622c92418ccSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 623*9496c9aeSAmlan Barua PetscInt N=fft->N; 624e81bb599SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 625b2b77a04SHong Zhang 626b2b77a04SHong Zhang PetscFunctionBegin; 627b2b77a04SHong Zhang PetscValidHeaderSpecific(A,MAT_CLASSID,1); 628b2b77a04SHong Zhang PetscValidType(A,1); 629b2b77a04SHong Zhang 630b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 631b2b77a04SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 632b2b77a04SHong Zhang if (size == 1){ /* sequential case */ 633e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX) 634b2b77a04SHong Zhang if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 635b2b77a04SHong Zhang if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 636e5d7f247SAmlan Barua #else 637e81bb599SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 638e81bb599SAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 639e81bb599SAmlan Barua #endif 640b2b77a04SHong Zhang } else { /* mpi case */ 641b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 6426882372aSHong Zhang ptrdiff_t local_n1,local_1_end; 643b2b77a04SHong Zhang fftw_complex *data_fin,*data_fout; 644*9496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 64551d1eed7SAmlan Barua double *data_finr ; 646b3a17365SAmlan Barua ptrdiff_t local_1_start,temp; 647*9496c9aeSAmlan Barua PetscInt vsize,n1,N1; 648*9496c9aeSAmlan Barua #endif 649*9496c9aeSAmlan Barua 650c92418ccSAmlan Barua // PetscInt ctr; 651c92418ccSAmlan Barua // ptrdiff_t ndim1,*pdim; 652c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 653c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 65411902ff2SHong Zhang 655c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 656f76f76beSAmlan Barua // {k 657c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 658c92418ccSAmlan Barua // } 659b2b77a04SHong Zhang 660f76f76beSAmlan Barua 661f76f76beSAmlan Barua 662b2b77a04SHong Zhang switch (ndim){ 663b2b77a04SHong Zhang case 1: 6646882372aSHong Zhang /* Get local size */ 665e5d7f247SAmlan Barua /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */ 666c92418ccSAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 6676882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 668*9496c9aeSAmlan Barua printf("The values of local_n0 and local_n1 are %ld and %ld\n",local_n0,local_n1); 6696882372aSHong Zhang if (fin) { 6706882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6716882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 6726882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6736882372aSHong Zhang } 6746882372aSHong Zhang if (fout) { 6756882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 67657625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6776882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6786882372aSHong Zhang } 679b2b77a04SHong Zhang break; 680b2b77a04SHong Zhang case 2: 6813c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6823c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 6833c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 6843c3a9c75SAmlan Barua if (fin) { 6853c3a9c75SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 68654dd5118SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 6873c3a9c75SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 68809dd8a53SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 6893c3a9c75SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6903c3a9c75SAmlan Barua } 69157625b84SAmlan Barua n1 = 2*local_n1*(dim[0]); 69257625b84SAmlan Barua //n1 = 2*local_n1*dim[1]; 69357625b84SAmlan Barua printf("The values are %ld\n",local_n1); 6943c3a9c75SAmlan Barua if (fout) { 69509dd8a53SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 69609dd8a53SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6973c3a9c75SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6983c3a9c75SAmlan Barua } 699f76f76beSAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 7003c3a9c75SAmlan Barua 7013c3a9c75SAmlan Barua #else 702b2b77a04SHong Zhang /* Get local size */ 70364657d84SAmlan Barua //printf("Hope this does not come here"); 704b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 705b2b77a04SHong Zhang if (fin) { 706b2b77a04SHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 707b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 708b2b77a04SHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 709b2b77a04SHong Zhang } 710b2b77a04SHong Zhang if (fout) { 711b2b77a04SHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 712b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 713b2b77a04SHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 714b2b77a04SHong Zhang } 71564657d84SAmlan Barua //printf("Hope this does not come here"); 7163c3a9c75SAmlan Barua #endif 717b2b77a04SHong Zhang break; 718b2b77a04SHong Zhang case 3: 7196882372aSHong Zhang /* Get local size */ 7203c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 72151d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 72251d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 72351d1eed7SAmlan Barua if (fin) { 72451d1eed7SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 72551d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 72651d1eed7SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 72751d1eed7SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 72851d1eed7SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 72951d1eed7SAmlan Barua } 73057625b84SAmlan Barua printf("The value is %ld",local_n1); 73157625b84SAmlan Barua n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 73251d1eed7SAmlan Barua if (fout) { 73351d1eed7SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 73451d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 73551d1eed7SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 73651d1eed7SAmlan Barua } 73751d1eed7SAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 73851d1eed7SAmlan Barua 73951d1eed7SAmlan Barua 7403c3a9c75SAmlan Barua #else 7416882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 74211902ff2SHong Zhang // printf("The quantity n is %d",n); 7436882372aSHong Zhang if (fin) { 7446882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7456882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 7466882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 7476882372aSHong Zhang } 7486882372aSHong Zhang if (fout) { 7496882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7506882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 7516882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 7526882372aSHong Zhang } 7533c3a9c75SAmlan Barua #endif 754b2b77a04SHong Zhang break; 755b2b77a04SHong Zhang default: 7566882372aSHong Zhang /* Get local size */ 7573c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 758b3a17365SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 759b3a17365SAmlan Barua printf("The value of temp is %ld\n",temp); 760b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 761b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 762b3a17365SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 763b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 764b3a17365SAmlan Barua if (fin) { 765b3a17365SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 766b3a17365SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 767b3a17365SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 768b3a17365SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 769b3a17365SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 770b3a17365SAmlan Barua } 77157625b84SAmlan Barua printf("The value is %ld. Global length is %d \n",local_n1,N1); 77257625b84SAmlan Barua temp = fftw->partial_dim; 77357625b84SAmlan Barua fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 77457625b84SAmlan Barua 77557625b84SAmlan Barua n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 776b3a17365SAmlan Barua if (fout) { 777b3a17365SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 77857625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 779b3a17365SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 780b3a17365SAmlan Barua } 781b3a17365SAmlan Barua 7823c3a9c75SAmlan Barua #else 783c92418ccSAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 78411902ff2SHong Zhang // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 78511902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 78611902ff2SHong Zhang // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 78711902ff2SHong Zhang // for(i=0;i<ndim;i++) 78811902ff2SHong Zhang // { 78911902ff2SHong Zhang // pdim[i]=dim[i];printf("%d",pdim[i]); 79011902ff2SHong Zhang // } 79111902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 79211902ff2SHong Zhang // printf("The quantity n is %d",n); 7936882372aSHong Zhang if (fin) { 7946882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7956882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 7966882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 7976882372aSHong Zhang } 7986882372aSHong Zhang if (fout) { 7996882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 8006882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 8016882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 8026882372aSHong Zhang } 8033c3a9c75SAmlan Barua #endif 804b2b77a04SHong Zhang break; 805b2b77a04SHong Zhang } 806b2b77a04SHong Zhang } 80754dd5118SAmlan Barua // if (fin){ 80854dd5118SAmlan Barua // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 80954dd5118SAmlan Barua // } 81054dd5118SAmlan Barua // if (fout){ 81154dd5118SAmlan Barua // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 81254dd5118SAmlan Barua // } 813b2b77a04SHong Zhang PetscFunctionReturn(0); 814b2b77a04SHong Zhang } 815b2b77a04SHong Zhang 816b2b77a04SHong Zhang #undef __FUNCT__ 8173c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT" 8183c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 8193c3a9c75SAmlan Barua { 8203c3a9c75SAmlan Barua PetscErrorCode ierr; 8213c3a9c75SAmlan Barua PetscFunctionBegin; 8223c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 8233c3a9c75SAmlan Barua PetscFunctionReturn(0); 8243c3a9c75SAmlan Barua } 82554efbe56SHong Zhang 826b2b77a04SHong Zhang /* 827*9496c9aeSAmlan Barua InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block. For real 828*9496c9aeSAmlan Barua parallel FFT, this routine also performs padding of right number of zeros at the end of the fastetst 829*9496c9aeSAmlan Barua changing dimension. 8303c3a9c75SAmlan Barua Input A, x, y 8313c3a9c75SAmlan Barua A - FFTW matrix 83254dd5118SAmlan Barua x - user data 833b2b77a04SHong Zhang Options Database Keys: 834b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 835b2b77a04SHong Zhang 836b2b77a04SHong Zhang Level: intermediate 837b2b77a04SHong Zhang 838b2b77a04SHong Zhang */ 8393c3a9c75SAmlan Barua 8403c3a9c75SAmlan Barua EXTERN_C_BEGIN 8413c3a9c75SAmlan Barua #undef __FUNCT__ 8423c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW" 8433c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 8443c3a9c75SAmlan Barua { 8453c3a9c75SAmlan Barua PetscErrorCode ierr; 8463c3a9c75SAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 8473c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 8483c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 849*9496c9aeSAmlan Barua PetscInt N=fft->N; 850b223cc97SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 851*9496c9aeSAmlan Barua PetscInt low; 852*9496c9aeSAmlan Barua PetscInt rank,size; 8533c3a9c75SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 854*9496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 8553c3a9c75SAmlan Barua VecScatter vecscat; 8563c3a9c75SAmlan Barua IS list1,list2; 857*9496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 858*9496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 859*9496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 860*9496c9aeSAmlan Barua PetscInt N1, n1 ,NM; 861*9496c9aeSAmlan Barua ptrdiff_t temp; 862*9496c9aeSAmlan Barua #endif 8633c3a9c75SAmlan Barua 864f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 865f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 8663c3a9c75SAmlan Barua ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 8673c3a9c75SAmlan Barua 868e81bb599SAmlan Barua if (size==1) 869e81bb599SAmlan Barua { 8707536937bSAmlan Barua /* switch (ndim){ 871e81bb599SAmlan Barua case 1: 872e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 873e81bb599SAmlan Barua for (i=0;i<dim[0];i++) 874e81bb599SAmlan Barua { 875e81bb599SAmlan Barua indx1[i] = i; 876e81bb599SAmlan Barua } 877e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 878e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 879e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 882b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 883b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 884e81bb599SAmlan Barua break; 885e81bb599SAmlan Barua 886e81bb599SAmlan Barua case 2: 887e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 888e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 889e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 890e81bb599SAmlan Barua indx1[i*dim[1]+j] = i*dim[1] + j; 891e81bb599SAmlan Barua } 892e81bb599SAmlan Barua } 893e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 894e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 895e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 897e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 898b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 899b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 900e81bb599SAmlan Barua break; 901e81bb599SAmlan Barua case 3: 902e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 903e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 904e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 905e81bb599SAmlan Barua for (k=0;k<dim[2];k++){ 906e81bb599SAmlan Barua indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 907e81bb599SAmlan Barua } 908e81bb599SAmlan Barua } 909e81bb599SAmlan Barua } 910e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 911e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 912e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 913e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 914e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 915b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 916b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 917e81bb599SAmlan Barua break; 918e81bb599SAmlan Barua default: 9197536937bSAmlan Barua */ 920*9496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 9216971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 9226971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9236971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9246971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 925b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 9267536937bSAmlan Barua // break; 9277536937bSAmlan Barua // } 928e81bb599SAmlan Barua } 929e81bb599SAmlan Barua 930e81bb599SAmlan Barua else{ 931e81bb599SAmlan Barua 9323c3a9c75SAmlan Barua switch (ndim){ 9333c3a9c75SAmlan Barua case 1: 93464657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 93564657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 93664657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 93764657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 93864657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 93964657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 94064657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 94164657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 94264657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 94364657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 94464657d84SAmlan Barua #else 945e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 94664657d84SAmlan Barua #endif 9473c3a9c75SAmlan Barua break; 9483c3a9c75SAmlan Barua case 2: 949bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 950bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 951bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 952bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 953bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 954bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 955bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 956bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 957bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 958bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 959bd59e6c6SAmlan Barua #else 9603c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 9613c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 962*9496c9aeSAmlan Barua //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 963*9496c9aeSAmlan Barua //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 964*9496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 965*9496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 9663c3a9c75SAmlan Barua 9673c3a9c75SAmlan Barua if (dim[1]%2==0) 9683c3a9c75SAmlan Barua NM = dim[1]+2; 9693c3a9c75SAmlan Barua else 9703c3a9c75SAmlan Barua NM = dim[1]+1; 9713c3a9c75SAmlan Barua 9723c3a9c75SAmlan Barua for (i=0;i<local_n0;i++){ 9733c3a9c75SAmlan Barua for (j=0;j<dim[1];j++){ 9743c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 9753c3a9c75SAmlan Barua tempindx1 = i*NM + j; 9765b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 9773c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 9783c3a9c75SAmlan Barua } 9793c3a9c75SAmlan Barua } 9803c3a9c75SAmlan Barua 9813c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 9823c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 9833c3a9c75SAmlan Barua 984f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 985f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 986f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 988b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 989b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 990b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 991b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 992bd59e6c6SAmlan Barua #endif 993*9496c9aeSAmlan Barua break; 9943c3a9c75SAmlan Barua 9953c3a9c75SAmlan Barua case 3: 996bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 997bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 998bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 999bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1000bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1001bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1002bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1003bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1006bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1007bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1008bd59e6c6SAmlan Barua #else 100951d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 101051d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 101151d1eed7SAmlan Barua 1012*9496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 1013*9496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 1014*9496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1015*9496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 101651d1eed7SAmlan Barua 101751d1eed7SAmlan Barua if (dim[2]%2==0) 101851d1eed7SAmlan Barua NM = dim[2]+2; 101951d1eed7SAmlan Barua else 102051d1eed7SAmlan Barua NM = dim[2]+1; 102151d1eed7SAmlan Barua 102251d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 102351d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 102451d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 102551d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 102651d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 102751d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 102851d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 102951d1eed7SAmlan Barua } 103051d1eed7SAmlan Barua } 103151d1eed7SAmlan Barua } 103251d1eed7SAmlan Barua 103351d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 103451d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 103551d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 103651d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 103751d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 103851d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1039b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1040b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1041b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1042b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1043bd59e6c6SAmlan Barua #endif 1044*9496c9aeSAmlan Barua break; 10453c3a9c75SAmlan Barua 10463c3a9c75SAmlan Barua default: 1047bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1048bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1049bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1050bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1051bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1052bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1054bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1055bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1056bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1057bd59e6c6SAmlan Barua #else 1058e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1059e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1060e5d7f247SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1061e5d7f247SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1062e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1063e5d7f247SAmlan Barua 1064e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 1065e5d7f247SAmlan Barua 1066e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1067e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1068e5d7f247SAmlan Barua 1069e5d7f247SAmlan Barua if (dim[ndim-1]%2==0) 1070ba6e06d0SAmlan Barua NM = 2; 1071e5d7f247SAmlan Barua else 1072ba6e06d0SAmlan Barua NM = 1; 1073e5d7f247SAmlan Barua 10746971673cSAmlan Barua j = low; 10756971673cSAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 10766971673cSAmlan Barua { 10776971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 10786971673cSAmlan Barua indx2[i] = j; 10796971673cSAmlan Barua if (k%dim[ndim-1]==0) 10806971673cSAmlan Barua { j+=NM;} 10816971673cSAmlan Barua j++; 10826971673cSAmlan Barua } 10836971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 10846971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 10856971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 10866971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10876971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10886971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1089b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1090b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1091b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1092b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1093bd59e6c6SAmlan Barua #endif 1094*9496c9aeSAmlan Barua break; 10953c3a9c75SAmlan Barua } 1096e81bb599SAmlan Barua } 10973c3a9c75SAmlan Barua 10983c3a9c75SAmlan Barua return 0; 10993c3a9c75SAmlan Barua } 11003c3a9c75SAmlan Barua EXTERN_C_END 11013c3a9c75SAmlan Barua 11023c3a9c75SAmlan Barua #undef __FUNCT__ 11033c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT" 11043c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 11053c3a9c75SAmlan Barua { 11063c3a9c75SAmlan Barua PetscErrorCode ierr; 11073c3a9c75SAmlan Barua PetscFunctionBegin; 11083c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 11093c3a9c75SAmlan Barua PetscFunctionReturn(0); 11103c3a9c75SAmlan Barua } 111154efbe56SHong Zhang 11125b3e41ffSAmlan Barua /* 11135b3e41ffSAmlan Barua OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 11145b3e41ffSAmlan Barua Input A, x, y 11155b3e41ffSAmlan Barua A - FFTW matrix 11165b3e41ffSAmlan Barua x - FFTW vector 11175b3e41ffSAmlan Barua y - PETSc vector that user can read 11185b3e41ffSAmlan Barua Options Database Keys: 11195b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 11205b3e41ffSAmlan Barua 11215b3e41ffSAmlan Barua Level: intermediate 11225b3e41ffSAmlan Barua 11233c3a9c75SAmlan Barua */ 11243c3a9c75SAmlan Barua 11253c3a9c75SAmlan Barua EXTERN_C_BEGIN 11263c3a9c75SAmlan Barua #undef __FUNCT__ 11275b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW" 11285b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 11295b3e41ffSAmlan Barua { 11305b3e41ffSAmlan Barua PetscErrorCode ierr; 11315b3e41ffSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 11325b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 11335b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 1134*9496c9aeSAmlan Barua PetscInt N=fft->N; 1135b223cc97SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 1136*9496c9aeSAmlan Barua PetscInt low; 1137*9496c9aeSAmlan Barua PetscInt rank,size; 11385b3e41ffSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 1139*9496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 11405b3e41ffSAmlan Barua VecScatter vecscat; 11415b3e41ffSAmlan Barua IS list1,list2; 1142*9496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1143*9496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 1144*9496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 1145*9496c9aeSAmlan Barua PetscInt N1, n1 ,NM; 1146*9496c9aeSAmlan Barua ptrdiff_t temp; 1147*9496c9aeSAmlan Barua #endif 1148*9496c9aeSAmlan Barua 11495b3e41ffSAmlan Barua 11505b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 11515b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1152cfe1ae98SAmlan Barua ierr = VecGetOwnershipRange(x,&low,PETSC_NULL); 11535b3e41ffSAmlan Barua 1154e81bb599SAmlan Barua if (size==1){ 11557536937bSAmlan Barua /* 11565b3e41ffSAmlan Barua switch (ndim){ 11575b3e41ffSAmlan Barua case 1: 1158e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 1159e81bb599SAmlan Barua for (i=0;i<dim[0];i++) 1160e81bb599SAmlan Barua { 1161e81bb599SAmlan Barua indx1[i] = i; 1162e81bb599SAmlan Barua } 1163e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1164e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1165e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1166e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1167e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1168b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1169b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1170e81bb599SAmlan Barua break; 1171e81bb599SAmlan Barua 1172e81bb599SAmlan Barua case 2: 1173e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 1174e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 1175e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 1176e81bb599SAmlan Barua indx1[i*dim[1]+j] = i*dim[1] + j; 1177e81bb599SAmlan Barua } 1178e81bb599SAmlan Barua } 1179e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1180e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1181e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1182e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1183e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1184b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1185b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1186e81bb599SAmlan Barua break; 1187e81bb599SAmlan Barua case 3: 1188e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1189e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 1190e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 1191e81bb599SAmlan Barua for (k=0;k<dim[2];k++){ 1192e81bb599SAmlan Barua indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 1193e81bb599SAmlan Barua } 1194e81bb599SAmlan Barua } 1195e81bb599SAmlan Barua } 1196e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1197e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1198e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1199e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1200e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1201b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1202b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1203e81bb599SAmlan Barua break; 1204e81bb599SAmlan Barua default: 12057536937bSAmlan Barua */ 12066971673cSAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1); 12076971673cSAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF); 12086971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 12096971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12106971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12116971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1212b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 12137536937bSAmlan Barua // break; 12147536937bSAmlan Barua // } 1215e81bb599SAmlan Barua } 1216e81bb599SAmlan Barua else{ 1217e81bb599SAmlan Barua 1218e81bb599SAmlan Barua switch (ndim){ 1219e81bb599SAmlan Barua case 1: 122064657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 122164657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1222*9496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1); 1223*9496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2); 122464657d84SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 122564657d84SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 122664657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 122764657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 122864657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 122964657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 123064657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 123164657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 123264657d84SAmlan Barua #else 12336a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 123464657d84SAmlan Barua #endif 12355b3e41ffSAmlan Barua break; 12365b3e41ffSAmlan Barua case 2: 1237bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1238bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1239bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 1240bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 1241bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1242bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1243bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1245bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1246bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1247bd59e6c6SAmlan Barua #else 12485b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 12495b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 12505b3e41ffSAmlan Barua 1251*9496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 1252*9496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 12535b3e41ffSAmlan Barua 12545b3e41ffSAmlan Barua if (dim[1]%2==0) 12555b3e41ffSAmlan Barua NM = dim[1]+2; 12565b3e41ffSAmlan Barua else 12575b3e41ffSAmlan Barua NM = dim[1]+1; 12585b3e41ffSAmlan Barua 12595b3e41ffSAmlan Barua 12605b3e41ffSAmlan Barua 12615b3e41ffSAmlan Barua for (i=0;i<local_n0;i++){ 12625b3e41ffSAmlan Barua for (j=0;j<dim[1];j++){ 12635b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 12645b3e41ffSAmlan Barua tempindx1 = i*NM + j; 12655b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 12665b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 1267cfe1ae98SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 1268cfe1ae98SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 1269cfe1ae98SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 1270cfe1ae98SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 12715b3e41ffSAmlan Barua } 12725b3e41ffSAmlan Barua } 12735b3e41ffSAmlan Barua 12745b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 12755b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 12765b3e41ffSAmlan Barua 12775b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 12785b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12795b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12805b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1281b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1282b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1283b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1284b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1285bd59e6c6SAmlan Barua #endif 1286*9496c9aeSAmlan Barua break; 12875b3e41ffSAmlan Barua case 3: 1288bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1289bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1290bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1291bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1292bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1293bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1294bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1295bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1296bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1297bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1298bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1299bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1300bd59e6c6SAmlan Barua #else 1301bd59e6c6SAmlan Barua 130251d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 130351d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 130451d1eed7SAmlan Barua 1305*9496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 1306*9496c9aeSAmlan Barua // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 1307*9496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1308*9496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 130951d1eed7SAmlan Barua 131051d1eed7SAmlan Barua if (dim[2]%2==0) 131151d1eed7SAmlan Barua NM = dim[2]+2; 131251d1eed7SAmlan Barua else 131351d1eed7SAmlan Barua NM = dim[2]+1; 131451d1eed7SAmlan Barua 131551d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 131651d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 131751d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 131851d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 131951d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 132051d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 132151d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 132251d1eed7SAmlan Barua } 132351d1eed7SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 132451d1eed7SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 132551d1eed7SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 132651d1eed7SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 132751d1eed7SAmlan Barua } 132851d1eed7SAmlan Barua } 132951d1eed7SAmlan Barua 133051d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 133151d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 133251d1eed7SAmlan Barua 133351d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 133451d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 133551d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 133651d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1337b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1338b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1339b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1340b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1341bd59e6c6SAmlan Barua #endif 1342*9496c9aeSAmlan Barua break; 13435b3e41ffSAmlan Barua default: 1344bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1345bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1346bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1347bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1348bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1349bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1350bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1351bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1352bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1353bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1354bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1355bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1356bd59e6c6SAmlan Barua #else 1357ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1358ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1359ba6e06d0SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1360ba6e06d0SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1361ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1362ba6e06d0SAmlan Barua 1363ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1364ba6e06d0SAmlan Barua 1365ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1366ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1367ba6e06d0SAmlan Barua 1368ba6e06d0SAmlan Barua if (dim[ndim-1]%2==0) 1369ba6e06d0SAmlan Barua NM = 2; 1370ba6e06d0SAmlan Barua else 1371ba6e06d0SAmlan Barua NM = 1; 1372ba6e06d0SAmlan Barua 1373ba6e06d0SAmlan Barua j = low; 1374ba6e06d0SAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1375ba6e06d0SAmlan Barua { 1376ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1377ba6e06d0SAmlan Barua indx2[i] = j; 1378ba6e06d0SAmlan Barua //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank); 1379ba6e06d0SAmlan Barua if (k%dim[ndim-1]==0) 1380ba6e06d0SAmlan Barua { j+=NM;} 1381ba6e06d0SAmlan Barua j++; 1382ba6e06d0SAmlan Barua } 1383ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1384ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1385ba6e06d0SAmlan Barua 1386ba6e06d0SAmlan Barua //ISView(list1,PETSC_VIEWER_STDOUT_SELF); 1387ba6e06d0SAmlan Barua 1388ba6e06d0SAmlan Barua 1389ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1390ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1391ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1392ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1393b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1394b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1395b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1396b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1397bd59e6c6SAmlan Barua #endif 1398*9496c9aeSAmlan Barua break; 13995b3e41ffSAmlan Barua } 1400e81bb599SAmlan Barua } 14015b3e41ffSAmlan Barua return 0; 14025b3e41ffSAmlan Barua } 14035b3e41ffSAmlan Barua EXTERN_C_END 14045b3e41ffSAmlan Barua 14055b3e41ffSAmlan Barua EXTERN_C_BEGIN 14065b3e41ffSAmlan Barua #undef __FUNCT__ 14073c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 14083c3a9c75SAmlan Barua /* 14093c3a9c75SAmlan Barua MatCreate_FFTW - Creates a matrix object that provides FFT 14103c3a9c75SAmlan Barua via the external package FFTW 14113c3a9c75SAmlan Barua Options Database Keys: 14123c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 14133c3a9c75SAmlan Barua 14143c3a9c75SAmlan Barua Level: intermediate 14153c3a9c75SAmlan Barua 14163c3a9c75SAmlan Barua */ 14173c3a9c75SAmlan Barua 1418b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 1419b2b77a04SHong Zhang { 1420b2b77a04SHong Zhang PetscErrorCode ierr; 1421b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 1422b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1423b2b77a04SHong Zhang Mat_FFTW *fftw; 1424b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1425b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1426b2b77a04SHong Zhang PetscBool flg; 1427*9496c9aeSAmlan Barua PetscInt p_flag,partial_dim=1,ctr,n1; 142811902ff2SHong Zhang PetscMPIInt size,rank; 1429*9496c9aeSAmlan Barua ptrdiff_t *pdim; 1430*9496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 1431*9496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1432*9496c9aeSAmlan Barua ptrdiff_t temp; 1433*9496c9aeSAmlan Barua PetscInt N1; 1434*9496c9aeSAmlan Barua #endif 1435*9496c9aeSAmlan Barua 1436b2b77a04SHong Zhang 1437b2b77a04SHong Zhang PetscFunctionBegin; 1438b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 143911902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 144054dd5118SAmlan Barua ierr = MPI_Barrier(PETSC_COMM_WORLD); 1441c92418ccSAmlan Barua 144211902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 144311902ff2SHong Zhang pdim[0] = dim[0]; 144411902ff2SHong Zhang for (ctr=1;ctr<ndim;ctr++) 144511902ff2SHong Zhang { 14466882372aSHong Zhang partial_dim *= dim[ctr]; 144711902ff2SHong Zhang pdim[ctr] = dim[ctr]; 14486882372aSHong Zhang } 14493c3a9c75SAmlan Barua 1450b2b77a04SHong Zhang if (size == 1) { 1451e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1452b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1453b2b77a04SHong Zhang n = N; 1454e81bb599SAmlan Barua #else 1455*9496c9aeSAmlan Barua PetscInt tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1]; 1456e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1457e81bb599SAmlan Barua n = tot_dim; 1458e81bb599SAmlan Barua #endif 1459e81bb599SAmlan Barua 1460b2b77a04SHong Zhang } else { 1461*9496c9aeSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start;//local_1_end; 1462b2b77a04SHong Zhang switch (ndim){ 1463b2b77a04SHong Zhang case 1: 14643c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 14653c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1466e5d7f247SAmlan Barua #else 1467*9496c9aeSAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 14686882372aSHong Zhang n = (PetscInt)local_n0; 1469*9496c9aeSAmlan Barua n1 = (PetscInt) local_n1; 1470*9496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1471e5d7f247SAmlan Barua #endif 1472b2b77a04SHong Zhang break; 1473b2b77a04SHong Zhang case 2: 14745b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 1475b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1476b2b77a04SHong Zhang /* 1477b2b77a04SHong Zhang PetscMPIInt rank; 1478b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1479b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 1480b2b77a04SHong Zhang */ 1481b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1482b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 14835b3e41ffSAmlan Barua #else 14845b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 14855b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1486*9496c9aeSAmlan Barua n1 = 2*(PetscInt)local_n1*(dim[0]); 1487*9496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 14885b3e41ffSAmlan Barua #endif 1489b2b77a04SHong Zhang break; 1490b2b77a04SHong Zhang case 3: 149151d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 149251d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 14936882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 14946882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 149551d1eed7SAmlan Barua #else 149651d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 149751d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1498*9496c9aeSAmlan Barua n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 1499*9496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 150051d1eed7SAmlan Barua #endif 1501b2b77a04SHong Zhang break; 1502b2b77a04SHong Zhang default: 1503b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 150411902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 15056882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 15066882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1507b3a17365SAmlan Barua #else 1508b3a17365SAmlan Barua temp = pdim[ndim-1]; 1509b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 1510*9496c9aeSAmlan Barua //printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]); 1511b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1512b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1513b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1514b3a17365SAmlan Barua pdim[ndim-1] = temp; 1515*9496c9aeSAmlan Barua //printf("For Multi dim case n = %d, N1 = %d\n",n,N1); 1516*9496c9aeSAmlan Barua temp = partial_dim*(dim[ndim-1]/2+1)*dim[0]/(dim[1]*dim[ndim-1]); 1517*9496c9aeSAmlan Barua n1 = 2*local_n1*temp; 1518*9496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,N1,N1);CHKERRQ(ierr); 1519b3a17365SAmlan Barua #endif 1520b2b77a04SHong Zhang break; 1521b2b77a04SHong Zhang } 1522b2b77a04SHong Zhang } 1523b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1524b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1525b2b77a04SHong Zhang fft->data = (void*)fftw; 1526b2b77a04SHong Zhang 1527b2b77a04SHong Zhang fft->n = n; 1528c92418ccSAmlan Barua fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1529e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 1530c92418ccSAmlan Barua ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1531b1301b2eSHong Zhang for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1532c92418ccSAmlan Barua 1533b2b77a04SHong Zhang fftw->p_forward = 0; 1534b2b77a04SHong Zhang fftw->p_backward = 0; 1535b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1536b2b77a04SHong Zhang 1537b2b77a04SHong Zhang if (size == 1){ 1538b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1539b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1540b2b77a04SHong Zhang } else { 1541b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1542b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1543b2b77a04SHong Zhang } 1544b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 15456a506ed8SAmlan Barua // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D 15464be45526SHong Zhang //A->ops->getvecs = MatGetVecs_FFTW; 1547b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 15484be45526SHong Zhang PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW); 15493c3a9c75SAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 15505b3e41ffSAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 1551b2b77a04SHong Zhang 1552b2b77a04SHong Zhang /* get runtime options */ 1553b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1554b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1555b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 1556b2b77a04SHong Zhang PetscOptionsEnd(); 1557b2b77a04SHong Zhang PetscFunctionReturn(0); 1558b2b77a04SHong Zhang } 1559b2b77a04SHong Zhang EXTERN_C_END 15603c3a9c75SAmlan Barua 15613c3a9c75SAmlan Barua 15623c3a9c75SAmlan Barua 15633c3a9c75SAmlan Barua 1564