1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14e5d7f247SAmlan Barua PetscInt partial_dim; 15b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 16b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 19b2b77a04SHong Zhang } Mat_FFTW; 20b2b77a04SHong Zhang 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27*4be45526SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); // to be removed! 28b2b77a04SHong Zhang 29b2b77a04SHong Zhang #undef __FUNCT__ 30b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 31b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 32b2b77a04SHong Zhang { 33b2b77a04SHong Zhang PetscErrorCode ierr; 34b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 35b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 36b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 37b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 38b2b77a04SHong Zhang 39b2b77a04SHong Zhang PetscFunctionBegin; 40b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 41b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 42b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 43b2b77a04SHong Zhang switch (ndim){ 44b2b77a04SHong Zhang case 1: 4558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 46b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 4758a912c5SAmlan Barua #else 4858a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 4958a912c5SAmlan Barua #endif 50b2b77a04SHong Zhang break; 51b2b77a04SHong Zhang case 2: 5258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 53b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 5458a912c5SAmlan Barua #else 5558a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 5658a912c5SAmlan Barua #endif 57b2b77a04SHong Zhang break; 58b2b77a04SHong Zhang case 3: 5958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 60b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6158a912c5SAmlan Barua #else 6258a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 6358a912c5SAmlan Barua #endif 64b2b77a04SHong Zhang break; 65b2b77a04SHong Zhang default: 6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 67b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6858a912c5SAmlan Barua #else 6958a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 7058a912c5SAmlan Barua #endif 71b2b77a04SHong Zhang break; 72b2b77a04SHong Zhang } 73b2b77a04SHong Zhang fftw->finarray = x_array; 74b2b77a04SHong Zhang fftw->foutarray = y_array; 75b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 76b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 77b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 78b2b77a04SHong Zhang } else { /* use existing plan */ 79b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 80b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 81b2b77a04SHong Zhang } else { 82b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 83b2b77a04SHong Zhang } 84b2b77a04SHong Zhang } 85b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 86b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 87b2b77a04SHong Zhang PetscFunctionReturn(0); 88b2b77a04SHong Zhang } 89b2b77a04SHong Zhang 90b2b77a04SHong Zhang #undef __FUNCT__ 91b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 92b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 93b2b77a04SHong Zhang { 94b2b77a04SHong Zhang PetscErrorCode ierr; 95b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 96b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 97b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 98b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 99b2b77a04SHong Zhang 100b2b77a04SHong Zhang PetscFunctionBegin; 101b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 102b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 103b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 104b2b77a04SHong Zhang switch (ndim){ 105b2b77a04SHong Zhang case 1: 10658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 107b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 10858a912c5SAmlan Barua #else 109e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 11058a912c5SAmlan Barua #endif 111b2b77a04SHong Zhang break; 112b2b77a04SHong Zhang case 2: 11358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 114b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 11558a912c5SAmlan Barua #else 116e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 11758a912c5SAmlan Barua #endif 118b2b77a04SHong Zhang break; 119b2b77a04SHong Zhang case 3: 12058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 121b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12258a912c5SAmlan Barua #else 123e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 12458a912c5SAmlan Barua #endif 125b2b77a04SHong Zhang break; 126b2b77a04SHong Zhang default: 12758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 128b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12958a912c5SAmlan Barua #else 130e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 13158a912c5SAmlan Barua #endif 132b2b77a04SHong Zhang break; 133b2b77a04SHong Zhang } 134b2b77a04SHong Zhang fftw->binarray = x_array; 135b2b77a04SHong Zhang fftw->boutarray = y_array; 136b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 137b2b77a04SHong Zhang } else { /* use existing plan */ 138b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 139b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 140b2b77a04SHong Zhang } else { 141b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 142b2b77a04SHong Zhang } 143b2b77a04SHong Zhang } 144b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 145b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 146b2b77a04SHong Zhang PetscFunctionReturn(0); 147b2b77a04SHong Zhang } 148b2b77a04SHong Zhang 149b2b77a04SHong Zhang #undef __FUNCT__ 150b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 151b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 152b2b77a04SHong Zhang { 153b2b77a04SHong Zhang PetscErrorCode ierr; 154b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 155b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 156b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 157c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 158b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 159b2b77a04SHong Zhang 160b2b77a04SHong Zhang PetscFunctionBegin; 161b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 162b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 163b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 164b2b77a04SHong Zhang switch (ndim){ 165b2b77a04SHong Zhang case 1: 1663c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 167b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 168ae0a50aaSHong Zhang #else 169ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 1703c3a9c75SAmlan Barua #endif 171b2b77a04SHong Zhang break; 172b2b77a04SHong Zhang case 2: 1733c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 174b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1753c3a9c75SAmlan Barua #else 1763c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1773c3a9c75SAmlan Barua #endif 178b2b77a04SHong Zhang break; 179b2b77a04SHong Zhang case 3: 1803c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 181b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1823c3a9c75SAmlan Barua #else 18351d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1843c3a9c75SAmlan Barua #endif 185b2b77a04SHong Zhang break; 186b2b77a04SHong Zhang default: 1873c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 188c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1893c3a9c75SAmlan Barua #else 1903c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1913c3a9c75SAmlan Barua #endif 192b2b77a04SHong Zhang break; 193b2b77a04SHong Zhang } 194b2b77a04SHong Zhang fftw->finarray = x_array; 195b2b77a04SHong Zhang fftw->foutarray = y_array; 196b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 197b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 198b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 199b2b77a04SHong Zhang } else { /* use existing plan */ 200b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 201b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 202b2b77a04SHong Zhang } else { 203b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 204b2b77a04SHong Zhang } 205b2b77a04SHong Zhang } 206b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 207b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 208b2b77a04SHong Zhang PetscFunctionReturn(0); 209b2b77a04SHong Zhang } 210b2b77a04SHong Zhang 211b2b77a04SHong Zhang #undef __FUNCT__ 212b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 213b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 214b2b77a04SHong Zhang { 215b2b77a04SHong Zhang PetscErrorCode ierr; 216b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 217b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 218b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 219c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 220b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 221b2b77a04SHong Zhang 222b2b77a04SHong Zhang PetscFunctionBegin; 223b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 224b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 225b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 226b2b77a04SHong Zhang switch (ndim){ 227b2b77a04SHong Zhang case 1: 2283c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 229b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 230ae0a50aaSHong Zhang #else 231ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 2323c3a9c75SAmlan Barua #endif 233b2b77a04SHong Zhang break; 234b2b77a04SHong Zhang case 2: 2353c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 236b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2373c3a9c75SAmlan Barua #else 2383c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2393c3a9c75SAmlan Barua #endif 240b2b77a04SHong Zhang break; 241b2b77a04SHong Zhang case 3: 2423c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 243b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2443c3a9c75SAmlan Barua #else 2453c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2463c3a9c75SAmlan Barua #endif 247b2b77a04SHong Zhang break; 248b2b77a04SHong Zhang default: 2493c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 250c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2513c3a9c75SAmlan Barua #else 2523c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2533c3a9c75SAmlan Barua #endif 254b2b77a04SHong Zhang break; 255b2b77a04SHong Zhang } 256b2b77a04SHong Zhang fftw->binarray = x_array; 257b2b77a04SHong Zhang fftw->boutarray = y_array; 258b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 259b2b77a04SHong Zhang } else { /* use existing plan */ 260b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 261b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 262b2b77a04SHong Zhang } else { 263b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 264b2b77a04SHong Zhang } 265b2b77a04SHong Zhang } 266b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 267b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 268b2b77a04SHong Zhang PetscFunctionReturn(0); 269b2b77a04SHong Zhang } 270b2b77a04SHong Zhang 271b2b77a04SHong Zhang #undef __FUNCT__ 272b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 273b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 274b2b77a04SHong Zhang { 275b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 276b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 277b2b77a04SHong Zhang PetscErrorCode ierr; 278b2b77a04SHong Zhang 279b2b77a04SHong Zhang PetscFunctionBegin; 280b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 281b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 282b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 283bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 284b2b77a04SHong Zhang PetscFunctionReturn(0); 285b2b77a04SHong Zhang } 286b2b77a04SHong Zhang 287c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 288b2b77a04SHong Zhang #undef __FUNCT__ 289b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 290b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 291b2b77a04SHong Zhang { 292b2b77a04SHong Zhang PetscErrorCode ierr; 293b2b77a04SHong Zhang PetscScalar *array; 294b2b77a04SHong Zhang 295b2b77a04SHong Zhang PetscFunctionBegin; 296b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 297b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 298b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 299b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 300b2b77a04SHong Zhang PetscFunctionReturn(0); 301b2b77a04SHong Zhang } 302b2b77a04SHong Zhang 303b2b77a04SHong Zhang #undef __FUNCT__ 304*4be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_1DC" 305c92418ccSAmlan Barua /* 306*4be45526SHong Zhang - Get Vectors(s) compatible with matrix, i.e. with the 307c92418ccSAmlan Barua parallel layout determined by FFTW-1D 308c92418ccSAmlan Barua 309c92418ccSAmlan Barua */ 310*4be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_1DC(Mat A,Vec *fin,Vec *fout,Vec *bout) 311c92418ccSAmlan Barua { 312c92418ccSAmlan Barua PetscErrorCode ierr; 313c92418ccSAmlan Barua PetscMPIInt size,rank; 314c92418ccSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 315c92418ccSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 316c92418ccSAmlan Barua // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 317c92418ccSAmlan Barua PetscInt N=fft->N; 318c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 319c92418ccSAmlan Barua ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 320c92418ccSAmlan Barua ptrdiff_t f_local_n1,f_local_1_end; 321c92418ccSAmlan Barua ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 322c92418ccSAmlan Barua ptrdiff_t b_local_n1,b_local_1_end; 323ae0a50aaSHong Zhang fftw_complex *data_fin,*data_fout,*data_bout; 324c92418ccSAmlan Barua 325c92418ccSAmlan Barua PetscFunctionBegin; 326c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 327c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 328c92418ccSAmlan Barua #endif 329c92418ccSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 330c92418ccSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 331c92418ccSAmlan Barua if (size == 1){ 332c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 333c92418ccSAmlan Barua } 334c92418ccSAmlan Barua else { 335c92418ccSAmlan Barua if (ndim>1){ 336c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 337c92418ccSAmlan Barua else { 338c92418ccSAmlan Barua f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 339c92418ccSAmlan Barua b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 340c92418ccSAmlan Barua if (fin) { 341c92418ccSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 342c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 343c92418ccSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 344c92418ccSAmlan Barua } 345c92418ccSAmlan Barua if (fout) { 346c92418ccSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 347c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 348c92418ccSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 349c92418ccSAmlan Barua } 350c92418ccSAmlan Barua if (bout) { 351c92418ccSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 352c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 353c92418ccSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 354c92418ccSAmlan Barua } 355c92418ccSAmlan Barua } 356c92418ccSAmlan Barua if (fin){ 357c92418ccSAmlan Barua ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 358c92418ccSAmlan Barua } 359c92418ccSAmlan Barua if (fout){ 360c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 361c92418ccSAmlan Barua } 362c92418ccSAmlan Barua if (bout){ 363c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 364c92418ccSAmlan Barua } 365c92418ccSAmlan Barua PetscFunctionReturn(0); 366c92418ccSAmlan Barua } 367c92418ccSAmlan Barua 368c92418ccSAmlan Barua 369c92418ccSAmlan Barua } 3703c3a9c75SAmlan Barua 3714f7415efSAmlan Barua #undef __FUNCT__ 372*4be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW" 373*4be45526SHong Zhang /*@ 374*4be45526SHong Zhang MatGetVecFFTW - Get vector(s) compatible with the matrix, i.e. with the 3754f7415efSAmlan Barua parallel layout determined by FFTW 3764f7415efSAmlan Barua 3774f7415efSAmlan Barua Collective on Mat 3784f7415efSAmlan Barua 3794f7415efSAmlan Barua Input Parameter: 3804f7415efSAmlan Barua . mat - the matrix 3814f7415efSAmlan Barua 3824f7415efSAmlan Barua Output Parameter: 3834f7415efSAmlan Barua + fin - (optional) input vector of forward FFTW 3844f7415efSAmlan Barua - fout - (optional) output vector of forward FFTW 3854f7415efSAmlan Barua 3864f7415efSAmlan Barua Level: advanced 3874f7415efSAmlan Barua 3884f7415efSAmlan Barua .seealso: MatCreateFFTW() 389*4be45526SHong Zhang @*/ 390*4be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 391*4be45526SHong Zhang { 392*4be45526SHong Zhang PetscErrorCode ierr; 393*4be45526SHong Zhang PetscFunctionBegin; 394*4be45526SHong Zhang ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 395*4be45526SHong Zhang PetscFunctionReturn(0); 396*4be45526SHong Zhang } 397*4be45526SHong Zhang 3984f7415efSAmlan Barua EXTERN_C_BEGIN 399*4be45526SHong Zhang #undef __FUNCT__ 400*4be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW" 401*4be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 4024f7415efSAmlan Barua { 4034f7415efSAmlan Barua PetscErrorCode ierr; 4044f7415efSAmlan Barua PetscMPIInt size,rank; 4054f7415efSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 4064f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 4074f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 4084f7415efSAmlan Barua PetscInt N=fft->N, N1, n1,vsize; 4094f7415efSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 4104f7415efSAmlan Barua 4114f7415efSAmlan Barua PetscFunctionBegin; 4124f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 4134f7415efSAmlan Barua PetscValidType(A,1); 4144f7415efSAmlan Barua 4154f7415efSAmlan Barua ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 4164f7415efSAmlan Barua ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 4174f7415efSAmlan Barua printf("size is %d\n",size); 4184f7415efSAmlan Barua if (size == 1){ /* sequential case */ 4194f7415efSAmlan Barua printf("Routine is getting called\n"); 4204f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4214f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 4224f7415efSAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 4234f7415efSAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 4244f7415efSAmlan Barua #else 4254f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 4264f7415efSAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 4274f7415efSAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);} 4284f7415efSAmlan Barua #endif 4294f7415efSAmlan Barua } else { 4304f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 4314f7415efSAmlan Barua ptrdiff_t local_n1,local_1_end; 4320c9b18e4SAmlan Barua fftw_complex *data_fin,*data_fout,*data_bout; 4334f7415efSAmlan Barua double *data_finr,*data_boutr ; 4344f7415efSAmlan Barua ptrdiff_t local_1_start,temp; 4354f7415efSAmlan Barua switch (ndim){ 4364f7415efSAmlan Barua case 1: 43757625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 43857625b84SAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 43957625b84SAmlan Barua #else 4404f7415efSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet"); 44157625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 44257625b84SAmlan Barua if (fin) { 44357625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 44457625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 44557625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 44657625b84SAmlan Barua } 44757625b84SAmlan Barua if (fout) { 44857625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 44957625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 45057625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 45157625b84SAmlan Barua } 45257625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 45357625b84SAmlan Barua if (bout) { 45457625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 45557625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 45657625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 45757625b84SAmlan Barua } 45857625b84SAmlan Barua break; 45957625b84SAmlan Barua 46057625b84SAmlan Barua 46157625b84SAmlan Barua #endif 4624f7415efSAmlan Barua case 2: 4634f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4644f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4654f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 4664f7415efSAmlan Barua if (fin) { 4674f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4684f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 4694f7415efSAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 4704f7415efSAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 4714f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4724f7415efSAmlan Barua } 4734f7415efSAmlan Barua if (bout) { 4744f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4754f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 4764f7415efSAmlan Barua ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 4774f7415efSAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 4784f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4794f7415efSAmlan Barua } 48057625b84SAmlan Barua if (fout) { 48157625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 48257625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 48357625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 48457625b84SAmlan Barua } 4854f7415efSAmlan Barua 4864f7415efSAmlan Barua //printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 4874f7415efSAmlan Barua 4884f7415efSAmlan Barua #else 4894f7415efSAmlan Barua /* Get local size */ 4900c9b18e4SAmlan Barua printf("Code works for paralllel 2d complex DFT\n"); 4914f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 4924f7415efSAmlan Barua if (fin) { 4934f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4944f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4954f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4964f7415efSAmlan Barua } 4974f7415efSAmlan Barua if (fout) { 4984f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4994f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5004f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5014f7415efSAmlan Barua } 5024f7415efSAmlan Barua if (bout) { 5034f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5044f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5054f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5064f7415efSAmlan Barua } 5074f7415efSAmlan Barua 5084f7415efSAmlan Barua //printf("Hope this does not come here"); 5094f7415efSAmlan Barua #endif 5104f7415efSAmlan Barua break; 5114f7415efSAmlan Barua 5124f7415efSAmlan Barua case 3: 5134f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5144f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5154f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 5164f7415efSAmlan Barua if (fin) { 5174f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5184f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 5194f7415efSAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 5204f7415efSAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 5214f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5224f7415efSAmlan Barua } 5234f7415efSAmlan Barua if (bout) { 5244f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5254f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 5264f7415efSAmlan Barua // ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 5274f7415efSAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 5284f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5294f7415efSAmlan Barua } 53057625b84SAmlan Barua if (fout) { 53157625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 53257625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 53357625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 53457625b84SAmlan Barua } 5354f7415efSAmlan Barua 5364f7415efSAmlan Barua //printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 5374f7415efSAmlan Barua 5384f7415efSAmlan Barua #else 5390c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 5400c9b18e4SAmlan Barua // printf("The quantity n is %d",n); 5410c9b18e4SAmlan Barua if (fin) { 5420c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5430c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5440c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5450c9b18e4SAmlan Barua } 5460c9b18e4SAmlan Barua if (fout) { 5470c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5480c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5490c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5500c9b18e4SAmlan Barua } 5510c9b18e4SAmlan Barua if (bout) { 5520c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5530c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5540c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5550c9b18e4SAmlan Barua } 5560c9b18e4SAmlan Barua 5574f7415efSAmlan Barua #endif 5584f7415efSAmlan Barua break; 5594f7415efSAmlan Barua default: 5604f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5614f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 5624f7415efSAmlan Barua printf("The value of temp is %ld\n",temp); 5634f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 5644f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5654f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 5664f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 5674f7415efSAmlan Barua 5684f7415efSAmlan Barua if (fin) { 5694f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5704f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 5714f7415efSAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 5724f7415efSAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 5734f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5744f7415efSAmlan Barua } 5754f7415efSAmlan Barua if (bout) { 5764f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5774f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 5784f7415efSAmlan Barua //ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 5794f7415efSAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 5804f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5814f7415efSAmlan Barua } 58257625b84SAmlan Barua if (fout) { 58357625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 58457625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 58557625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 58657625b84SAmlan Barua } 5874f7415efSAmlan Barua 5884f7415efSAmlan Barua #else 5890c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 5900c9b18e4SAmlan Barua if (fin) { 5910c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5920c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5930c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5940c9b18e4SAmlan Barua } 5950c9b18e4SAmlan Barua if (fout) { 5960c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5970c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5980c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5990c9b18e4SAmlan Barua } 6000c9b18e4SAmlan Barua if (bout) { 6010c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6020c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 6030c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6040c9b18e4SAmlan Barua } 6054f7415efSAmlan Barua 6060c9b18e4SAmlan Barua 6070c9b18e4SAmlan Barua 6084f7415efSAmlan Barua #endif 6094f7415efSAmlan Barua break; 6104f7415efSAmlan Barua } 6114f7415efSAmlan Barua } 6124f7415efSAmlan Barua PetscFunctionReturn(0); 6134f7415efSAmlan Barua } 6144f7415efSAmlan Barua EXTERN_C_END 6154f7415efSAmlan Barua 616c92418ccSAmlan Barua #undef __FUNCT__ 617b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW" 618b2b77a04SHong Zhang /* 619b2b77a04SHong Zhang MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 620b2b77a04SHong Zhang parallel layout determined by FFTW 621b2b77a04SHong Zhang 622b2b77a04SHong Zhang Collective on Mat 623b2b77a04SHong Zhang 624b2b77a04SHong Zhang Input Parameter: 625b2b77a04SHong Zhang . mat - the matrix 626b2b77a04SHong Zhang 627b2b77a04SHong Zhang Output Parameter: 628b2b77a04SHong Zhang + fin - (optional) input vector of forward FFTW 629b2b77a04SHong Zhang - fout - (optional) output vector of forward FFTW 630b2b77a04SHong Zhang 631b2b77a04SHong Zhang Level: advanced 632b2b77a04SHong Zhang 633b2b77a04SHong Zhang .seealso: MatCreateFFTW() 634b2b77a04SHong Zhang */ 635b2b77a04SHong Zhang PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 636b2b77a04SHong Zhang { 637b2b77a04SHong Zhang PetscErrorCode ierr; 638b2b77a04SHong Zhang PetscMPIInt size,rank; 639b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 640b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 641c92418ccSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 6423c3a9c75SAmlan Barua PetscInt N=fft->N, N1, n1,vsize; 643e81bb599SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 644b2b77a04SHong Zhang 645b2b77a04SHong Zhang PetscFunctionBegin; 646b2b77a04SHong Zhang PetscValidHeaderSpecific(A,MAT_CLASSID,1); 647b2b77a04SHong Zhang PetscValidType(A,1); 648b2b77a04SHong Zhang 649b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 650b2b77a04SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 651b2b77a04SHong Zhang if (size == 1){ /* sequential case */ 652e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX) 653b2b77a04SHong Zhang if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 654b2b77a04SHong Zhang if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 655e5d7f247SAmlan Barua #else 656e81bb599SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 657e81bb599SAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 658e81bb599SAmlan Barua #endif 659b2b77a04SHong Zhang } else { /* mpi case */ 660b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 6616882372aSHong Zhang ptrdiff_t local_n1,local_1_end; 662b2b77a04SHong Zhang fftw_complex *data_fin,*data_fout; 66351d1eed7SAmlan Barua double *data_finr ; 664b3a17365SAmlan Barua ptrdiff_t local_1_start,temp; 665c92418ccSAmlan Barua // PetscInt ctr; 666c92418ccSAmlan Barua // ptrdiff_t ndim1,*pdim; 667c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 668c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 66911902ff2SHong Zhang 670c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 671f76f76beSAmlan Barua // {k 672c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 673c92418ccSAmlan Barua // } 674b2b77a04SHong Zhang 675f76f76beSAmlan Barua 676f76f76beSAmlan Barua 677b2b77a04SHong Zhang switch (ndim){ 678b2b77a04SHong Zhang case 1: 6796882372aSHong Zhang /* Get local size */ 680e5d7f247SAmlan Barua /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */ 681c92418ccSAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 6826882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 68357625b84SAmlan Barua printf("The values of local_n0 and local_n1 are %d and %d",local_n0,local_n1); 6846882372aSHong Zhang if (fin) { 6856882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6866882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 6876882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6886882372aSHong Zhang } 6896882372aSHong Zhang if (fout) { 6906882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 69157625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6926882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6936882372aSHong Zhang } 694b2b77a04SHong Zhang break; 695b2b77a04SHong Zhang case 2: 6963c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6973c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 6983c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 6993c3a9c75SAmlan Barua if (fin) { 7003c3a9c75SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 70154dd5118SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 7023c3a9c75SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 70309dd8a53SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 7043c3a9c75SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 7053c3a9c75SAmlan Barua } 70657625b84SAmlan Barua n1 = 2*local_n1*(dim[0]); 70757625b84SAmlan Barua //n1 = 2*local_n1*dim[1]; 70857625b84SAmlan Barua printf("The values are %ld\n",local_n1); 7093c3a9c75SAmlan Barua if (fout) { 71009dd8a53SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 71109dd8a53SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 7123c3a9c75SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 7133c3a9c75SAmlan Barua } 714f76f76beSAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 7153c3a9c75SAmlan Barua 7163c3a9c75SAmlan Barua #else 717b2b77a04SHong Zhang /* Get local size */ 71864657d84SAmlan Barua //printf("Hope this does not come here"); 719b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 720b2b77a04SHong Zhang if (fin) { 721b2b77a04SHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 722b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 723b2b77a04SHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 724b2b77a04SHong Zhang } 725b2b77a04SHong Zhang if (fout) { 726b2b77a04SHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 727b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 728b2b77a04SHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 729b2b77a04SHong Zhang } 73064657d84SAmlan Barua //printf("Hope this does not come here"); 7313c3a9c75SAmlan Barua #endif 732b2b77a04SHong Zhang break; 733b2b77a04SHong Zhang case 3: 7346882372aSHong Zhang /* Get local size */ 7353c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 73651d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 73751d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 73851d1eed7SAmlan Barua if (fin) { 73951d1eed7SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 74051d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 74151d1eed7SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 74251d1eed7SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 74351d1eed7SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 74451d1eed7SAmlan Barua } 74557625b84SAmlan Barua printf("The value is %ld",local_n1); 74657625b84SAmlan Barua n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 74751d1eed7SAmlan Barua if (fout) { 74851d1eed7SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 74951d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 75051d1eed7SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 75151d1eed7SAmlan Barua } 75251d1eed7SAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 75351d1eed7SAmlan Barua 75451d1eed7SAmlan Barua 7553c3a9c75SAmlan Barua #else 7566882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 75711902ff2SHong Zhang // printf("The quantity n is %d",n); 7586882372aSHong Zhang if (fin) { 7596882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7606882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 7616882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 7626882372aSHong Zhang } 7636882372aSHong Zhang if (fout) { 7646882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7656882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 7666882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 7676882372aSHong Zhang } 7683c3a9c75SAmlan Barua #endif 769b2b77a04SHong Zhang break; 770b2b77a04SHong Zhang default: 7716882372aSHong Zhang /* Get local size */ 7723c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 773b3a17365SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 774b3a17365SAmlan Barua printf("The value of temp is %ld\n",temp); 775b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 776b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 777b3a17365SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 778b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 779b3a17365SAmlan Barua if (fin) { 780b3a17365SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 781b3a17365SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 782b3a17365SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 783b3a17365SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 784b3a17365SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 785b3a17365SAmlan Barua } 78657625b84SAmlan Barua printf("The value is %ld. Global length is %d \n",local_n1,N1); 78757625b84SAmlan Barua temp = fftw->partial_dim; 78857625b84SAmlan Barua fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 78957625b84SAmlan Barua 79057625b84SAmlan Barua n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 791b3a17365SAmlan Barua if (fout) { 792b3a17365SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 79357625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 794b3a17365SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 795b3a17365SAmlan Barua } 796b3a17365SAmlan Barua 7973c3a9c75SAmlan Barua #else 798c92418ccSAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 79911902ff2SHong Zhang // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 80011902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 80111902ff2SHong Zhang // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 80211902ff2SHong Zhang // for(i=0;i<ndim;i++) 80311902ff2SHong Zhang // { 80411902ff2SHong Zhang // pdim[i]=dim[i];printf("%d",pdim[i]); 80511902ff2SHong Zhang // } 80611902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 80711902ff2SHong Zhang // printf("The quantity n is %d",n); 8086882372aSHong Zhang if (fin) { 8096882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 8106882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 8116882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 8126882372aSHong Zhang } 8136882372aSHong Zhang if (fout) { 8146882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 8156882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 8166882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 8176882372aSHong Zhang } 8183c3a9c75SAmlan Barua #endif 819b2b77a04SHong Zhang break; 820b2b77a04SHong Zhang } 821b2b77a04SHong Zhang } 82254dd5118SAmlan Barua // if (fin){ 82354dd5118SAmlan Barua // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 82454dd5118SAmlan Barua // } 82554dd5118SAmlan Barua // if (fout){ 82654dd5118SAmlan Barua // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 82754dd5118SAmlan Barua // } 828b2b77a04SHong Zhang PetscFunctionReturn(0); 829b2b77a04SHong Zhang } 830b2b77a04SHong Zhang 831b2b77a04SHong Zhang #undef __FUNCT__ 8323c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT" 8333c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 8343c3a9c75SAmlan Barua { 8353c3a9c75SAmlan Barua PetscErrorCode ierr; 8363c3a9c75SAmlan Barua PetscFunctionBegin; 8373c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 8383c3a9c75SAmlan Barua PetscFunctionReturn(0); 8393c3a9c75SAmlan Barua } 84054efbe56SHong Zhang 841b2b77a04SHong Zhang /* 8423c3a9c75SAmlan Barua InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block 8433c3a9c75SAmlan Barua Input A, x, y 8443c3a9c75SAmlan Barua A - FFTW matrix 84554dd5118SAmlan Barua x - user data 846b2b77a04SHong Zhang Options Database Keys: 847b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 848b2b77a04SHong Zhang 849b2b77a04SHong Zhang Level: intermediate 850b2b77a04SHong Zhang 851b2b77a04SHong Zhang */ 8523c3a9c75SAmlan Barua 8533c3a9c75SAmlan Barua EXTERN_C_BEGIN 8543c3a9c75SAmlan Barua #undef __FUNCT__ 8553c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW" 8563c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 8573c3a9c75SAmlan Barua { 8583c3a9c75SAmlan Barua PetscErrorCode ierr; 8593c3a9c75SAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 8603c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 8613c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 8623c3a9c75SAmlan Barua PetscInt N=fft->N, N1, n1 ,NM; 863b223cc97SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 864b223cc97SAmlan Barua PetscInt low, *indx1, *indx2, tempindx, tempindx1; 865e5d7f247SAmlan Barua PetscInt i,j,k,rank,size,partial_dim; 8663c3a9c75SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 867e5d7f247SAmlan Barua ptrdiff_t local_n1,local_1_start,temp; 8683c3a9c75SAmlan Barua VecScatter vecscat; 8693c3a9c75SAmlan Barua IS list1,list2; 8703c3a9c75SAmlan Barua 871f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 872f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 8733c3a9c75SAmlan Barua ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 874f76f76beSAmlan Barua printf("Local ownership starts at %d\n",low); 8753c3a9c75SAmlan Barua 876e81bb599SAmlan Barua if (size==1) 877e81bb599SAmlan Barua { 8787536937bSAmlan Barua /* switch (ndim){ 879e81bb599SAmlan Barua case 1: 880e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 881e81bb599SAmlan Barua for (i=0;i<dim[0];i++) 882e81bb599SAmlan Barua { 883e81bb599SAmlan Barua indx1[i] = i; 884e81bb599SAmlan Barua } 885e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 886e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 887e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 888e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 890b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 891b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 892e81bb599SAmlan Barua break; 893e81bb599SAmlan Barua 894e81bb599SAmlan Barua case 2: 895e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 896e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 897e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 898e81bb599SAmlan Barua indx1[i*dim[1]+j] = i*dim[1] + j; 899e81bb599SAmlan Barua } 900e81bb599SAmlan Barua } 901e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 902e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 903e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 904e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 906b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 907b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 908e81bb599SAmlan Barua break; 909e81bb599SAmlan Barua case 3: 910e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 911e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 912e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 913e81bb599SAmlan Barua for (k=0;k<dim[2];k++){ 914e81bb599SAmlan Barua indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 915e81bb599SAmlan Barua } 916e81bb599SAmlan Barua } 917e81bb599SAmlan Barua } 918e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 919e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 920e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 921e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 923b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 924b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 925e81bb599SAmlan Barua break; 926e81bb599SAmlan Barua default: 9277536937bSAmlan Barua */ 9286971673cSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1); 9296971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 9306971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9316971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9326971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 933b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 9346971673cSAmlan Barua //ierr = ISDestroy(list1);CHKERRQ(ierr); 9357536937bSAmlan Barua // break; 9367536937bSAmlan Barua // } 937e81bb599SAmlan Barua } 938e81bb599SAmlan Barua 939e81bb599SAmlan Barua else{ 940e81bb599SAmlan Barua 9413c3a9c75SAmlan Barua switch (ndim){ 9423c3a9c75SAmlan Barua case 1: 94364657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 94464657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 94564657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 94664657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 94764657d84SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 94864657d84SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 94964657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 95064657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 95164657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 95264657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 95364657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 95464657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 95564657d84SAmlan Barua break; 95664657d84SAmlan Barua #else 957e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 95864657d84SAmlan Barua #endif 9593c3a9c75SAmlan Barua break; 9603c3a9c75SAmlan Barua case 2: 961bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 962bd59e6c6SAmlan Barua //PetscInt my_value; 963bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 964bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 965bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 966bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 967bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 968bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 969bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 970bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 971bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 972bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 973bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 974bd59e6c6SAmlan Barua break; 975bd59e6c6SAmlan Barua #else 9763c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 9773c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 9783c3a9c75SAmlan Barua 9795b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 9805b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 9815b3e41ffSAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 9823c3a9c75SAmlan Barua 9833c3a9c75SAmlan Barua if (dim[1]%2==0) 9843c3a9c75SAmlan Barua NM = dim[1]+2; 9853c3a9c75SAmlan Barua else 9863c3a9c75SAmlan Barua NM = dim[1]+1; 9873c3a9c75SAmlan Barua 9883c3a9c75SAmlan Barua for (i=0;i<local_n0;i++){ 9893c3a9c75SAmlan Barua for (j=0;j<dim[1];j++){ 9903c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 9913c3a9c75SAmlan Barua tempindx1 = i*NM + j; 9925b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 9933c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 9945b3e41ffSAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 9955b3e41ffSAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 9965b3e41ffSAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 9975b3e41ffSAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 9983c3a9c75SAmlan Barua } 9993c3a9c75SAmlan Barua } 10003c3a9c75SAmlan Barua 10013c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 10023c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 10033c3a9c75SAmlan Barua 1004f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1005f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1008b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1009b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1010b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1011b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 10123c3a9c75SAmlan Barua break; 1013bd59e6c6SAmlan Barua #endif 10143c3a9c75SAmlan Barua 10153c3a9c75SAmlan Barua case 3: 1016bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1017bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1018bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1019bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1020bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1021bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1022bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1023bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1024bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1025bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1026bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1027bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1028bd59e6c6SAmlan Barua break; 1029bd59e6c6SAmlan Barua #else 103051d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 103151d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 103251d1eed7SAmlan Barua 103351d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 103451d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 103551d1eed7SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 103651d1eed7SAmlan Barua 103751d1eed7SAmlan Barua if (dim[2]%2==0) 103851d1eed7SAmlan Barua NM = dim[2]+2; 103951d1eed7SAmlan Barua else 104051d1eed7SAmlan Barua NM = dim[2]+1; 104151d1eed7SAmlan Barua 104251d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 104351d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 104451d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 104551d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 104651d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 104751d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 104851d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 104951d1eed7SAmlan Barua } 105051d1eed7SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 105151d1eed7SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 105251d1eed7SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 105351d1eed7SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 105451d1eed7SAmlan Barua } 105551d1eed7SAmlan Barua } 105651d1eed7SAmlan Barua 105751d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 105851d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 105951d1eed7SAmlan Barua 106051d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 106151d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 106251d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 106351d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1064b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1065b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1066b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1067b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 10683c3a9c75SAmlan Barua break; 1069bd59e6c6SAmlan Barua #endif 10703c3a9c75SAmlan Barua 10713c3a9c75SAmlan Barua default: 1072bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1073bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1074bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1075bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1076bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1077bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1078bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1079bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1080bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1081bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1082bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1083bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1084bd59e6c6SAmlan Barua 1085bd59e6c6SAmlan Barua 1086bd59e6c6SAmlan Barua #else 1087e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1088e5d7f247SAmlan Barua printf("The value of temp is %ld\n",temp); 1089e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1090e5d7f247SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1091e5d7f247SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1092e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1093e5d7f247SAmlan Barua 1094e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 1095e5d7f247SAmlan Barua printf("The value of partial dim is %d\n",partial_dim); 1096e5d7f247SAmlan Barua 1097e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1098e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1099e5d7f247SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 1100e5d7f247SAmlan Barua 1101e5d7f247SAmlan Barua if (dim[ndim-1]%2==0) 1102ba6e06d0SAmlan Barua NM = 2; 1103e5d7f247SAmlan Barua else 1104ba6e06d0SAmlan Barua NM = 1; 1105e5d7f247SAmlan Barua 11066971673cSAmlan Barua j = low; 11076971673cSAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 11086971673cSAmlan Barua { 11096971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 11106971673cSAmlan Barua indx2[i] = j; 1111ba6e06d0SAmlan Barua //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank); 11126971673cSAmlan Barua if (k%dim[ndim-1]==0) 11136971673cSAmlan Barua { j+=NM;} 11146971673cSAmlan Barua j++; 11156971673cSAmlan Barua } 11166971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 11176971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 11186971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 11196971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 11206971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 11216971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1122b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1123b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1124b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1125b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 11263c3a9c75SAmlan Barua break; 1127bd59e6c6SAmlan Barua #endif 11283c3a9c75SAmlan Barua } 1129e81bb599SAmlan Barua } 11303c3a9c75SAmlan Barua 11313c3a9c75SAmlan Barua return 0; 11323c3a9c75SAmlan Barua } 11333c3a9c75SAmlan Barua EXTERN_C_END 11343c3a9c75SAmlan Barua 11353c3a9c75SAmlan Barua #undef __FUNCT__ 11363c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT" 11373c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 11383c3a9c75SAmlan Barua { 11393c3a9c75SAmlan Barua PetscErrorCode ierr; 11403c3a9c75SAmlan Barua PetscFunctionBegin; 11413c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 11423c3a9c75SAmlan Barua PetscFunctionReturn(0); 11433c3a9c75SAmlan Barua } 114454efbe56SHong Zhang 11455b3e41ffSAmlan Barua /* 11465b3e41ffSAmlan Barua OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 11475b3e41ffSAmlan Barua Input A, x, y 11485b3e41ffSAmlan Barua A - FFTW matrix 11495b3e41ffSAmlan Barua x - FFTW vector 11505b3e41ffSAmlan Barua y - PETSc vector that user can read 11515b3e41ffSAmlan Barua Options Database Keys: 11525b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 11535b3e41ffSAmlan Barua 11545b3e41ffSAmlan Barua Level: intermediate 11555b3e41ffSAmlan Barua 11563c3a9c75SAmlan Barua */ 11573c3a9c75SAmlan Barua 11583c3a9c75SAmlan Barua EXTERN_C_BEGIN 11593c3a9c75SAmlan Barua #undef __FUNCT__ 11605b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW" 11615b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 11625b3e41ffSAmlan Barua { 11635b3e41ffSAmlan Barua PetscErrorCode ierr; 11645b3e41ffSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 11655b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 11665b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 11675b3e41ffSAmlan Barua PetscInt N=fft->N, N1, n1 ,NM; 1168b223cc97SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 1169b223cc97SAmlan Barua PetscInt low, *indx1, *indx2, tempindx, tempindx1; 1170ba6e06d0SAmlan Barua PetscInt i,j,k,rank,size,partial_dim; 11715b3e41ffSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 1172ba6e06d0SAmlan Barua ptrdiff_t local_n1,local_1_start,temp; 11735b3e41ffSAmlan Barua VecScatter vecscat; 11745b3e41ffSAmlan Barua IS list1,list2; 11755b3e41ffSAmlan Barua 11765b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 11775b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1178cfe1ae98SAmlan Barua ierr = VecGetOwnershipRange(x,&low,PETSC_NULL); 11795b3e41ffSAmlan Barua 1180e81bb599SAmlan Barua if (size==1){ 11817536937bSAmlan Barua /* 11825b3e41ffSAmlan Barua switch (ndim){ 11835b3e41ffSAmlan Barua case 1: 1184e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 1185e81bb599SAmlan Barua for (i=0;i<dim[0];i++) 1186e81bb599SAmlan Barua { 1187e81bb599SAmlan Barua indx1[i] = i; 1188e81bb599SAmlan Barua } 1189e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1190e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1191e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1192e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1193e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1194b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1195b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1196e81bb599SAmlan Barua break; 1197e81bb599SAmlan Barua 1198e81bb599SAmlan Barua case 2: 1199e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 1200e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 1201e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 1202e81bb599SAmlan Barua indx1[i*dim[1]+j] = i*dim[1] + j; 1203e81bb599SAmlan Barua } 1204e81bb599SAmlan Barua } 1205e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1206e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1207e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1208e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1209e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1210b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1211b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1212e81bb599SAmlan Barua break; 1213e81bb599SAmlan Barua case 3: 1214e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1215e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 1216e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 1217e81bb599SAmlan Barua for (k=0;k<dim[2];k++){ 1218e81bb599SAmlan Barua indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 1219e81bb599SAmlan Barua } 1220e81bb599SAmlan Barua } 1221e81bb599SAmlan Barua } 1222e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1223e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1224e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1225e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1226e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1227b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1228b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1229e81bb599SAmlan Barua break; 1230e81bb599SAmlan Barua default: 12317536937bSAmlan Barua */ 12326971673cSAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1); 12336971673cSAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF); 12346971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 12356971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12366971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12376971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1238b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 12397536937bSAmlan Barua // break; 12407536937bSAmlan Barua // } 1241e81bb599SAmlan Barua } 1242e81bb599SAmlan Barua else{ 1243e81bb599SAmlan Barua 1244e81bb599SAmlan Barua switch (ndim){ 1245e81bb599SAmlan Barua case 1: 124664657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 124764657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 124864657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 124964657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 125064657d84SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 125164657d84SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 125264657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 125364657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 125464657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 125564657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 125664657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 125764657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 125864657d84SAmlan Barua break; 125964657d84SAmlan Barua 126064657d84SAmlan Barua #else 12616a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 126264657d84SAmlan Barua #endif 12635b3e41ffSAmlan Barua break; 12645b3e41ffSAmlan Barua case 2: 1265bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1266bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1267bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 1268bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 1269bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1270bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1271bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1272bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1273bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1274bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1275bd59e6c6SAmlan Barua break; 1276bd59e6c6SAmlan Barua #else 12775b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 12785b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 12795b3e41ffSAmlan Barua 12805b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 12815b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 12825b3e41ffSAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 12835b3e41ffSAmlan Barua 12845b3e41ffSAmlan Barua if (dim[1]%2==0) 12855b3e41ffSAmlan Barua NM = dim[1]+2; 12865b3e41ffSAmlan Barua else 12875b3e41ffSAmlan Barua NM = dim[1]+1; 12885b3e41ffSAmlan Barua 12895b3e41ffSAmlan Barua 12905b3e41ffSAmlan Barua 12915b3e41ffSAmlan Barua for (i=0;i<local_n0;i++){ 12925b3e41ffSAmlan Barua for (j=0;j<dim[1];j++){ 12935b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 12945b3e41ffSAmlan Barua tempindx1 = i*NM + j; 12955b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 12965b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 1297cfe1ae98SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 1298cfe1ae98SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 1299cfe1ae98SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 1300cfe1ae98SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 13015b3e41ffSAmlan Barua } 13025b3e41ffSAmlan Barua } 13035b3e41ffSAmlan Barua 13045b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 13055b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 13065b3e41ffSAmlan Barua 13075b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 13085b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 13095b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 13105b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1311b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1312b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1313b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1314b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 13155b3e41ffSAmlan Barua break; 1316bd59e6c6SAmlan Barua #endif 13175b3e41ffSAmlan Barua case 3: 1318bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1319bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1320bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1321bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1322bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1323bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1324bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1325bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1326bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1327bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1328bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1329bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1330bd59e6c6SAmlan Barua break; 1331bd59e6c6SAmlan Barua #else 1332bd59e6c6SAmlan Barua 133351d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 133451d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 133551d1eed7SAmlan Barua 133651d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 133751d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 133851d1eed7SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 133951d1eed7SAmlan Barua 134051d1eed7SAmlan Barua if (dim[2]%2==0) 134151d1eed7SAmlan Barua NM = dim[2]+2; 134251d1eed7SAmlan Barua else 134351d1eed7SAmlan Barua NM = dim[2]+1; 134451d1eed7SAmlan Barua 134551d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 134651d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 134751d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 134851d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 134951d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 135051d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 135151d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 135251d1eed7SAmlan Barua } 135351d1eed7SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 135451d1eed7SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 135551d1eed7SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 135651d1eed7SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 135751d1eed7SAmlan Barua } 135851d1eed7SAmlan Barua } 135951d1eed7SAmlan Barua 136051d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 136151d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 136251d1eed7SAmlan Barua 136351d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 136451d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 136551d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 136651d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1367b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1368b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1369b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1370b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 13715b3e41ffSAmlan Barua break; 1372bd59e6c6SAmlan Barua #endif 13735b3e41ffSAmlan Barua default: 1374bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1375bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1376bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1377bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1378bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1379bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1380bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1381bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1382bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1383bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1384bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1385bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1386bd59e6c6SAmlan Barua #else 1387ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1388ba6e06d0SAmlan Barua printf("The value of temp is %ld\n",temp); 1389ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1390ba6e06d0SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1391ba6e06d0SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1392ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1393ba6e06d0SAmlan Barua 1394ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1395ba6e06d0SAmlan Barua printf("The value of partial dim is %d\n",partial_dim); 1396ba6e06d0SAmlan Barua 1397ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1398ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1399ba6e06d0SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 1400ba6e06d0SAmlan Barua 1401ba6e06d0SAmlan Barua if (dim[ndim-1]%2==0) 1402ba6e06d0SAmlan Barua NM = 2; 1403ba6e06d0SAmlan Barua else 1404ba6e06d0SAmlan Barua NM = 1; 1405ba6e06d0SAmlan Barua 1406ba6e06d0SAmlan Barua j = low; 1407ba6e06d0SAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1408ba6e06d0SAmlan Barua { 1409ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1410ba6e06d0SAmlan Barua indx2[i] = j; 1411ba6e06d0SAmlan Barua //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank); 1412ba6e06d0SAmlan Barua if (k%dim[ndim-1]==0) 1413ba6e06d0SAmlan Barua { j+=NM;} 1414ba6e06d0SAmlan Barua j++; 1415ba6e06d0SAmlan Barua } 1416ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1417ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1418ba6e06d0SAmlan Barua 1419ba6e06d0SAmlan Barua //ISView(list1,PETSC_VIEWER_STDOUT_SELF); 1420ba6e06d0SAmlan Barua 1421ba6e06d0SAmlan Barua 1422ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1423ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1424ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1425ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1426b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1427b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1428b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1429b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1430ba6e06d0SAmlan Barua 14315b3e41ffSAmlan Barua break; 1432bd59e6c6SAmlan Barua #endif 14335b3e41ffSAmlan Barua } 1434e81bb599SAmlan Barua } 14355b3e41ffSAmlan Barua return 0; 14365b3e41ffSAmlan Barua } 14375b3e41ffSAmlan Barua EXTERN_C_END 14385b3e41ffSAmlan Barua 14395b3e41ffSAmlan Barua EXTERN_C_BEGIN 14405b3e41ffSAmlan Barua #undef __FUNCT__ 14413c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 14423c3a9c75SAmlan Barua /* 14433c3a9c75SAmlan Barua MatCreate_FFTW - Creates a matrix object that provides FFT 14443c3a9c75SAmlan Barua via the external package FFTW 14453c3a9c75SAmlan Barua Options Database Keys: 14463c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 14473c3a9c75SAmlan Barua 14483c3a9c75SAmlan Barua Level: intermediate 14493c3a9c75SAmlan Barua 14503c3a9c75SAmlan Barua */ 14513c3a9c75SAmlan Barua 1452b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 1453b2b77a04SHong Zhang { 1454b2b77a04SHong Zhang PetscErrorCode ierr; 1455b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 1456b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1457b2b77a04SHong Zhang Mat_FFTW *fftw; 1458b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1459b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1460b2b77a04SHong Zhang PetscBool flg; 1461b3a17365SAmlan Barua PetscInt p_flag,partial_dim=1,ctr,N1; 146211902ff2SHong Zhang PetscMPIInt size,rank; 1463b3a17365SAmlan Barua ptrdiff_t *pdim, temp; 14644a16a297SSean Farley ptrdiff_t local_n1,local_1_start,local_1_end; 1465b2b77a04SHong Zhang 1466b2b77a04SHong Zhang PetscFunctionBegin; 1467b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 146811902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 146954dd5118SAmlan Barua ierr = MPI_Barrier(PETSC_COMM_WORLD); 1470c92418ccSAmlan Barua 147111902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 147211902ff2SHong Zhang pdim[0] = dim[0]; 147311902ff2SHong Zhang for(ctr=1;ctr<ndim;ctr++) 147411902ff2SHong Zhang { 14756882372aSHong Zhang partial_dim *= dim[ctr]; 147611902ff2SHong Zhang pdim[ctr] = dim[ctr]; 14776882372aSHong Zhang } 14783c3a9c75SAmlan Barua 1479b2b77a04SHong Zhang if (size == 1) { 1480e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1481b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1482b2b77a04SHong Zhang n = N; 1483e81bb599SAmlan Barua #else 1484e81bb599SAmlan Barua int tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1]; 1485e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1486e81bb599SAmlan Barua n = tot_dim; 1487e81bb599SAmlan Barua #endif 1488e81bb599SAmlan Barua 1489b2b77a04SHong Zhang } else { 1490b223cc97SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start;//local_n1,local_1_end; 1491b2b77a04SHong Zhang switch (ndim){ 1492b2b77a04SHong Zhang case 1: 14933c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 14943c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1495e5d7f247SAmlan Barua #else 14966882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 14976882372aSHong Zhang n = (PetscInt)local_n0; 1498798b254bSAmlan Barua printf("The value of n is %d",n); 14996882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1500e5d7f247SAmlan Barua #endif 15013c3a9c75SAmlan Barua // PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW); 1502b2b77a04SHong Zhang break; 1503b2b77a04SHong Zhang case 2: 15045b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 1505b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1506b2b77a04SHong Zhang /* 1507b2b77a04SHong Zhang PetscMPIInt rank; 1508b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1509b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 1510b2b77a04SHong Zhang */ 1511b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1512b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 15135b3e41ffSAmlan Barua #else 15145b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 15155b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 15165b3e41ffSAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 15175b3e41ffSAmlan Barua #endif 1518b2b77a04SHong Zhang break; 1519b2b77a04SHong Zhang case 3: 152011902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 152151d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 152251d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 15236882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 15246882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 152551d1eed7SAmlan Barua #else 152651d1eed7SAmlan Barua printf("The code comes here\n"); 152751d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 152851d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 152951d1eed7SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 153051d1eed7SAmlan Barua #endif 1531b2b77a04SHong Zhang break; 1532b2b77a04SHong Zhang default: 1533b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 153411902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1535c92418ccSAmlan Barua // printf("The value of alloc local is %ld from process %d\n",alloc_local,rank); 153611902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 15376882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 153811902ff2SHong Zhang // printf("New partial dimension is %d %d %d",n,N,ndim); 15396882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1540b3a17365SAmlan Barua #else 1541b3a17365SAmlan Barua temp = pdim[ndim-1]; 1542b3a17365SAmlan Barua pdim[ndim-1]= temp/2 + 1; 1543b3a17365SAmlan Barua printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]); 1544b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1545b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1546b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1547b3a17365SAmlan Barua pdim[ndim-1] = temp; 1548b3a17365SAmlan Barua printf("For Multi dim case n = %d, N1 = %d\n",n,N1); 1549b3a17365SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1550b3a17365SAmlan Barua #endif 1551b2b77a04SHong Zhang break; 1552b2b77a04SHong Zhang } 1553b2b77a04SHong Zhang } 1554b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1555b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1556b2b77a04SHong Zhang fft->data = (void*)fftw; 1557b2b77a04SHong Zhang 1558b2b77a04SHong Zhang fft->n = n; 1559c92418ccSAmlan Barua fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1560e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 1561c92418ccSAmlan Barua ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1562b1301b2eSHong Zhang for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1563c92418ccSAmlan Barua 1564b2b77a04SHong Zhang fftw->p_forward = 0; 1565b2b77a04SHong Zhang fftw->p_backward = 0; 1566b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1567b2b77a04SHong Zhang 1568b2b77a04SHong Zhang if (size == 1){ 1569b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1570b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1571b2b77a04SHong Zhang } else { 1572b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1573b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1574b2b77a04SHong Zhang } 1575b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 15766a506ed8SAmlan Barua // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D 1577*4be45526SHong Zhang //A->ops->getvecs = MatGetVecs_FFTW; 1578b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 1579*4be45526SHong Zhang PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW); 15803c3a9c75SAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 15815b3e41ffSAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 1582b2b77a04SHong Zhang 1583b2b77a04SHong Zhang /* get runtime options */ 1584b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1585b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1586b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 1587b2b77a04SHong Zhang PetscOptionsEnd(); 1588b2b77a04SHong Zhang PetscFunctionReturn(0); 1589b2b77a04SHong Zhang } 1590b2b77a04SHong Zhang EXTERN_C_END 15913c3a9c75SAmlan Barua 15923c3a9c75SAmlan Barua 15933c3a9c75SAmlan Barua 15943c3a9c75SAmlan Barua 1595