1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14e5d7f247SAmlan Barua PetscInt partial_dim; 15b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 16b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 19b2b77a04SHong Zhang } Mat_FFTW; 20b2b77a04SHong Zhang 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 284f7415efSAmlan Barua extern PetscErrorCode MatGetVecsFFT_FFTW(Mat,Vec*,Vec*,Vec*); 29b2b77a04SHong Zhang 30b2b77a04SHong Zhang #undef __FUNCT__ 31b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 32b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 33b2b77a04SHong Zhang { 34b2b77a04SHong Zhang PetscErrorCode ierr; 35b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 36b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 37b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 38b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 39b2b77a04SHong Zhang 40b2b77a04SHong Zhang PetscFunctionBegin; 41b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 42b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 43b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 44b2b77a04SHong Zhang switch (ndim){ 45b2b77a04SHong Zhang case 1: 4658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 47b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 4858a912c5SAmlan Barua #else 4958a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 5058a912c5SAmlan Barua #endif 51b2b77a04SHong Zhang break; 52b2b77a04SHong Zhang case 2: 5358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 54b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 5558a912c5SAmlan Barua #else 5658a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 5758a912c5SAmlan Barua #endif 58b2b77a04SHong Zhang break; 59b2b77a04SHong Zhang case 3: 6058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 61b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6258a912c5SAmlan Barua #else 6358a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 6458a912c5SAmlan Barua #endif 65b2b77a04SHong Zhang break; 66b2b77a04SHong Zhang default: 6758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 68b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6958a912c5SAmlan Barua #else 7058a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 7158a912c5SAmlan Barua #endif 72b2b77a04SHong Zhang break; 73b2b77a04SHong Zhang } 74b2b77a04SHong Zhang fftw->finarray = x_array; 75b2b77a04SHong Zhang fftw->foutarray = y_array; 76b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 77b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 78b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 79b2b77a04SHong Zhang } else { /* use existing plan */ 80b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 81b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 82b2b77a04SHong Zhang } else { 83b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 84b2b77a04SHong Zhang } 85b2b77a04SHong Zhang } 86b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 87b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 88b2b77a04SHong Zhang PetscFunctionReturn(0); 89b2b77a04SHong Zhang } 90b2b77a04SHong Zhang 91b2b77a04SHong Zhang #undef __FUNCT__ 92b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 93b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 94b2b77a04SHong Zhang { 95b2b77a04SHong Zhang PetscErrorCode ierr; 96b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 97b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 98b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 99b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 100b2b77a04SHong Zhang 101b2b77a04SHong Zhang PetscFunctionBegin; 102b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 103b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 104b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 105b2b77a04SHong Zhang switch (ndim){ 106b2b77a04SHong Zhang case 1: 10758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 108b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 10958a912c5SAmlan Barua #else 110e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 11158a912c5SAmlan Barua #endif 112b2b77a04SHong Zhang break; 113b2b77a04SHong Zhang case 2: 11458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 115b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 11658a912c5SAmlan Barua #else 117e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 11858a912c5SAmlan Barua #endif 119b2b77a04SHong Zhang break; 120b2b77a04SHong Zhang case 3: 12158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 122b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12358a912c5SAmlan Barua #else 124e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 12558a912c5SAmlan Barua #endif 126b2b77a04SHong Zhang break; 127b2b77a04SHong Zhang default: 12858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 129b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 13058a912c5SAmlan Barua #else 131e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 13258a912c5SAmlan Barua #endif 133b2b77a04SHong Zhang break; 134b2b77a04SHong Zhang } 135b2b77a04SHong Zhang fftw->binarray = x_array; 136b2b77a04SHong Zhang fftw->boutarray = y_array; 137b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 138b2b77a04SHong Zhang } else { /* use existing plan */ 139b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 140b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 141b2b77a04SHong Zhang } else { 142b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 143b2b77a04SHong Zhang } 144b2b77a04SHong Zhang } 145b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 146b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 147b2b77a04SHong Zhang PetscFunctionReturn(0); 148b2b77a04SHong Zhang } 149b2b77a04SHong Zhang 150b2b77a04SHong Zhang #undef __FUNCT__ 151b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 152b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 153b2b77a04SHong Zhang { 154b2b77a04SHong Zhang PetscErrorCode ierr; 155b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 156b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 157b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 158c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 159b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 160b2b77a04SHong Zhang 161b2b77a04SHong Zhang PetscFunctionBegin; 162b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 163b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 164b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 165b2b77a04SHong Zhang switch (ndim){ 166b2b77a04SHong Zhang case 1: 1673c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 168b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 169ae0a50aaSHong Zhang #else 170ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 1713c3a9c75SAmlan Barua #endif 172b2b77a04SHong Zhang break; 173b2b77a04SHong Zhang case 2: 1743c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 175b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1763c3a9c75SAmlan Barua #else 1773c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1783c3a9c75SAmlan Barua #endif 179b2b77a04SHong Zhang break; 180b2b77a04SHong Zhang case 3: 1813c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 182b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1833c3a9c75SAmlan Barua #else 18451d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1853c3a9c75SAmlan Barua #endif 186b2b77a04SHong Zhang break; 187b2b77a04SHong Zhang default: 1883c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 189c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1903c3a9c75SAmlan Barua #else 1913c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1923c3a9c75SAmlan Barua #endif 193b2b77a04SHong Zhang break; 194b2b77a04SHong Zhang } 195b2b77a04SHong Zhang fftw->finarray = x_array; 196b2b77a04SHong Zhang fftw->foutarray = y_array; 197b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 198b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 199b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 200b2b77a04SHong Zhang } else { /* use existing plan */ 201b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 202b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 203b2b77a04SHong Zhang } else { 204b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 205b2b77a04SHong Zhang } 206b2b77a04SHong Zhang } 207b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 208b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 209b2b77a04SHong Zhang PetscFunctionReturn(0); 210b2b77a04SHong Zhang } 211b2b77a04SHong Zhang 212b2b77a04SHong Zhang #undef __FUNCT__ 213b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 214b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 215b2b77a04SHong Zhang { 216b2b77a04SHong Zhang PetscErrorCode ierr; 217b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 218b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 219b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 220c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 221b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 222b2b77a04SHong Zhang 223b2b77a04SHong Zhang PetscFunctionBegin; 224b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 225b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 226b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 227b2b77a04SHong Zhang switch (ndim){ 228b2b77a04SHong Zhang case 1: 2293c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 230b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 231ae0a50aaSHong Zhang #else 232ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 2333c3a9c75SAmlan Barua #endif 234b2b77a04SHong Zhang break; 235b2b77a04SHong Zhang case 2: 2363c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 237b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2383c3a9c75SAmlan Barua #else 2393c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2403c3a9c75SAmlan Barua #endif 241b2b77a04SHong Zhang break; 242b2b77a04SHong Zhang case 3: 2433c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 244b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2453c3a9c75SAmlan Barua #else 2463c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2473c3a9c75SAmlan Barua #endif 248b2b77a04SHong Zhang break; 249b2b77a04SHong Zhang default: 2503c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 251c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2523c3a9c75SAmlan Barua #else 2533c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2543c3a9c75SAmlan Barua #endif 255b2b77a04SHong Zhang break; 256b2b77a04SHong Zhang } 257b2b77a04SHong Zhang fftw->binarray = x_array; 258b2b77a04SHong Zhang fftw->boutarray = y_array; 259b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 260b2b77a04SHong Zhang } else { /* use existing plan */ 261b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 262b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 263b2b77a04SHong Zhang } else { 264b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 265b2b77a04SHong Zhang } 266b2b77a04SHong Zhang } 267b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 268b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 269b2b77a04SHong Zhang PetscFunctionReturn(0); 270b2b77a04SHong Zhang } 271b2b77a04SHong Zhang 272b2b77a04SHong Zhang #undef __FUNCT__ 273b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 274b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 275b2b77a04SHong Zhang { 276b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 277b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 278b2b77a04SHong Zhang PetscErrorCode ierr; 279b2b77a04SHong Zhang 280b2b77a04SHong Zhang PetscFunctionBegin; 281b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 282b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 283b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 284bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 285b2b77a04SHong Zhang PetscFunctionReturn(0); 286b2b77a04SHong Zhang } 287b2b77a04SHong Zhang 288c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 289b2b77a04SHong Zhang #undef __FUNCT__ 290b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 291b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 292b2b77a04SHong Zhang { 293b2b77a04SHong Zhang PetscErrorCode ierr; 294b2b77a04SHong Zhang PetscScalar *array; 295b2b77a04SHong Zhang 296b2b77a04SHong Zhang PetscFunctionBegin; 297b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 298b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 299b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 300b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 301b2b77a04SHong Zhang PetscFunctionReturn(0); 302b2b77a04SHong Zhang } 303b2b77a04SHong Zhang 304b2b77a04SHong Zhang #undef __FUNCT__ 3053c3a9c75SAmlan Barua #define __FUNCT__ "MatGetVecs1DC_FFTW" 306c92418ccSAmlan Barua /* 307c92418ccSAmlan Barua MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the 308c92418ccSAmlan Barua parallel layout determined by FFTW-1D 309c92418ccSAmlan Barua 310c92418ccSAmlan Barua */ 3116a506ed8SAmlan Barua PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bout) 312c92418ccSAmlan Barua { 313c92418ccSAmlan Barua PetscErrorCode ierr; 314c92418ccSAmlan Barua PetscMPIInt size,rank; 315c92418ccSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 316c92418ccSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 317c92418ccSAmlan Barua // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 318c92418ccSAmlan Barua PetscInt N=fft->N; 319c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 320c92418ccSAmlan Barua ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 321c92418ccSAmlan Barua ptrdiff_t f_local_n1,f_local_1_end; 322c92418ccSAmlan Barua ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 323c92418ccSAmlan Barua ptrdiff_t b_local_n1,b_local_1_end; 324ae0a50aaSHong Zhang fftw_complex *data_fin,*data_fout,*data_bout; 325c92418ccSAmlan Barua 326c92418ccSAmlan Barua PetscFunctionBegin; 327c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 328c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 329c92418ccSAmlan Barua #endif 330c92418ccSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 331c92418ccSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 332c92418ccSAmlan Barua if (size == 1){ 333c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 334c92418ccSAmlan Barua } 335c92418ccSAmlan Barua else { 336c92418ccSAmlan Barua if (ndim>1){ 337c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 338c92418ccSAmlan Barua else { 339c92418ccSAmlan Barua f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 340c92418ccSAmlan Barua b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 341c92418ccSAmlan Barua if (fin) { 342c92418ccSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 343c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 344c92418ccSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 345c92418ccSAmlan Barua } 346c92418ccSAmlan Barua if (fout) { 347c92418ccSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 348c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 349c92418ccSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 350c92418ccSAmlan Barua } 351c92418ccSAmlan Barua if (bout) { 352c92418ccSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 353c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 354c92418ccSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 355c92418ccSAmlan Barua } 356c92418ccSAmlan Barua } 357c92418ccSAmlan Barua if (fin){ 358c92418ccSAmlan Barua ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 359c92418ccSAmlan Barua } 360c92418ccSAmlan Barua if (fout){ 361c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 362c92418ccSAmlan Barua } 363c92418ccSAmlan Barua if (bout){ 364c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 365c92418ccSAmlan Barua } 366c92418ccSAmlan Barua PetscFunctionReturn(0); 367c92418ccSAmlan Barua } 368c92418ccSAmlan Barua 369c92418ccSAmlan Barua 370c92418ccSAmlan Barua } 3713c3a9c75SAmlan Barua 3724f7415efSAmlan Barua 3734f7415efSAmlan Barua #undef __FUNCT__ 3744f7415efSAmlan Barua #define __FUNCT__ "MatGetVecsFFT" 3754f7415efSAmlan Barua PetscErrorCode MatGetVecsFFT(Mat A,Vec *x,Vec *y,Vec *z) 3764f7415efSAmlan Barua { 3774f7415efSAmlan Barua PetscErrorCode ierr; 3784f7415efSAmlan Barua PetscFunctionBegin; 3794f7415efSAmlan Barua ierr = PetscTryMethod(A,"MatGetVecsFFT_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 3804f7415efSAmlan Barua PetscFunctionReturn(0); 3814f7415efSAmlan Barua } 3824f7415efSAmlan Barua 3834f7415efSAmlan Barua #undef __FUNCT__ 3844f7415efSAmlan Barua #define __FUNCT__ "MatGetVecsFFT_FFTW" 3854f7415efSAmlan Barua /* 3864f7415efSAmlan Barua MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 3874f7415efSAmlan Barua parallel layout determined by FFTW 3884f7415efSAmlan Barua 3894f7415efSAmlan Barua Collective on Mat 3904f7415efSAmlan Barua 3914f7415efSAmlan Barua Input Parameter: 3924f7415efSAmlan Barua . mat - the matrix 3934f7415efSAmlan Barua 3944f7415efSAmlan Barua Output Parameter: 3954f7415efSAmlan Barua + fin - (optional) input vector of forward FFTW 3964f7415efSAmlan Barua - fout - (optional) output vector of forward FFTW 3974f7415efSAmlan Barua 3984f7415efSAmlan Barua Level: advanced 3994f7415efSAmlan Barua 4004f7415efSAmlan Barua .seealso: MatCreateFFTW() 4014f7415efSAmlan Barua */ 4024f7415efSAmlan Barua EXTERN_C_BEGIN 4034f7415efSAmlan Barua PetscErrorCode MatGetVecsFFT_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 4044f7415efSAmlan Barua { 4054f7415efSAmlan Barua PetscErrorCode ierr; 4064f7415efSAmlan Barua PetscMPIInt size,rank; 4074f7415efSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 4084f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 4094f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 4104f7415efSAmlan Barua PetscInt N=fft->N, N1, n1,vsize; 4114f7415efSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 4124f7415efSAmlan Barua 4134f7415efSAmlan Barua PetscFunctionBegin; 4144f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 4154f7415efSAmlan Barua PetscValidType(A,1); 4164f7415efSAmlan Barua 4174f7415efSAmlan Barua ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 4184f7415efSAmlan Barua ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 4194f7415efSAmlan Barua printf("size is %d\n",size); 4204f7415efSAmlan Barua if (size == 1){ /* sequential case */ 4214f7415efSAmlan Barua printf("Routine is getting called\n"); 4224f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4234f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 4244f7415efSAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 4254f7415efSAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 4264f7415efSAmlan Barua #else 4274f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 4284f7415efSAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 4294f7415efSAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);} 4304f7415efSAmlan Barua #endif 4314f7415efSAmlan Barua } else { 4324f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 4334f7415efSAmlan Barua ptrdiff_t local_n1,local_1_end; 4340c9b18e4SAmlan Barua fftw_complex *data_fin,*data_fout,*data_bout; 4354f7415efSAmlan Barua double *data_finr,*data_boutr ; 4364f7415efSAmlan Barua ptrdiff_t local_1_start,temp; 4374f7415efSAmlan Barua switch (ndim){ 4384f7415efSAmlan Barua case 1: 439*57625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 440*57625b84SAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 441*57625b84SAmlan Barua #else 4424f7415efSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet"); 443*57625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 444*57625b84SAmlan Barua if (fin) { 445*57625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 446*57625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 447*57625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 448*57625b84SAmlan Barua } 449*57625b84SAmlan Barua if (fout) { 450*57625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 451*57625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 452*57625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 453*57625b84SAmlan Barua } 454*57625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 455*57625b84SAmlan Barua if (bout) { 456*57625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 457*57625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 458*57625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 459*57625b84SAmlan Barua } 460*57625b84SAmlan Barua break; 461*57625b84SAmlan Barua 462*57625b84SAmlan Barua 463*57625b84SAmlan Barua #endif 4644f7415efSAmlan Barua case 2: 4654f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4664f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4674f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 4684f7415efSAmlan Barua if (fin) { 4694f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4704f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 4714f7415efSAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 4724f7415efSAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 4734f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4744f7415efSAmlan Barua } 4754f7415efSAmlan Barua if (bout) { 4764f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 4774f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 4784f7415efSAmlan Barua ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 4794f7415efSAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 4804f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4814f7415efSAmlan Barua } 482*57625b84SAmlan Barua if (fout) { 483*57625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 484*57625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 485*57625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 486*57625b84SAmlan Barua } 4874f7415efSAmlan Barua 4884f7415efSAmlan Barua //printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 4894f7415efSAmlan Barua 4904f7415efSAmlan Barua #else 4914f7415efSAmlan Barua /* Get local size */ 4920c9b18e4SAmlan Barua printf("Code works for paralllel 2d complex DFT\n"); 4934f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 4944f7415efSAmlan Barua if (fin) { 4954f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4964f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4974f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4984f7415efSAmlan Barua } 4994f7415efSAmlan Barua if (fout) { 5004f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5014f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5024f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5034f7415efSAmlan Barua } 5044f7415efSAmlan Barua if (bout) { 5054f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5064f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5074f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5084f7415efSAmlan Barua } 5094f7415efSAmlan Barua 5104f7415efSAmlan Barua //printf("Hope this does not come here"); 5114f7415efSAmlan Barua #endif 5124f7415efSAmlan Barua break; 5134f7415efSAmlan Barua 5144f7415efSAmlan Barua case 3: 5154f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5164f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5174f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 5184f7415efSAmlan Barua if (fin) { 5194f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5204f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 5214f7415efSAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 5224f7415efSAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 5234f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5244f7415efSAmlan Barua } 5254f7415efSAmlan Barua if (bout) { 5264f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5274f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 5284f7415efSAmlan Barua // ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 5294f7415efSAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 5304f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5314f7415efSAmlan Barua } 532*57625b84SAmlan Barua if (fout) { 533*57625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 534*57625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 535*57625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 536*57625b84SAmlan Barua } 5374f7415efSAmlan Barua 5384f7415efSAmlan Barua //printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 5394f7415efSAmlan Barua 5404f7415efSAmlan Barua #else 5410c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 5420c9b18e4SAmlan Barua // printf("The quantity n is %d",n); 5430c9b18e4SAmlan Barua if (fin) { 5440c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5450c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5460c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5470c9b18e4SAmlan Barua } 5480c9b18e4SAmlan Barua if (fout) { 5490c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5500c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5510c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5520c9b18e4SAmlan Barua } 5530c9b18e4SAmlan Barua if (bout) { 5540c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5550c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5560c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5570c9b18e4SAmlan Barua } 5580c9b18e4SAmlan Barua 5594f7415efSAmlan Barua #endif 5604f7415efSAmlan Barua break; 5614f7415efSAmlan Barua default: 5624f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5634f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 5644f7415efSAmlan Barua printf("The value of temp is %ld\n",temp); 5654f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 5664f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5674f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 5684f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 5694f7415efSAmlan Barua 5704f7415efSAmlan Barua if (fin) { 5714f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5724f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 5734f7415efSAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 5744f7415efSAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 5754f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5764f7415efSAmlan Barua } 5774f7415efSAmlan Barua if (bout) { 5784f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 5794f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 5804f7415efSAmlan Barua //ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 5814f7415efSAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 5824f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5834f7415efSAmlan Barua } 584*57625b84SAmlan Barua if (fout) { 585*57625b84SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 586*57625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 587*57625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 588*57625b84SAmlan Barua } 5894f7415efSAmlan Barua 5904f7415efSAmlan Barua #else 5910c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 5920c9b18e4SAmlan Barua if (fin) { 5930c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5940c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5950c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5960c9b18e4SAmlan Barua } 5970c9b18e4SAmlan Barua if (fout) { 5980c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5990c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6000c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6010c9b18e4SAmlan Barua } 6020c9b18e4SAmlan Barua if (bout) { 6030c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6040c9b18e4SAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 6050c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6060c9b18e4SAmlan Barua } 6074f7415efSAmlan Barua 6080c9b18e4SAmlan Barua 6090c9b18e4SAmlan Barua 6104f7415efSAmlan Barua #endif 6114f7415efSAmlan Barua break; 6124f7415efSAmlan Barua } 6134f7415efSAmlan Barua } 6144f7415efSAmlan Barua PetscFunctionReturn(0); 6154f7415efSAmlan Barua } 6164f7415efSAmlan Barua EXTERN_C_END 6174f7415efSAmlan Barua 6184f7415efSAmlan Barua 6194f7415efSAmlan Barua 620c92418ccSAmlan Barua #undef __FUNCT__ 621b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW" 622b2b77a04SHong Zhang /* 623b2b77a04SHong Zhang MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 624b2b77a04SHong Zhang parallel layout determined by FFTW 625b2b77a04SHong Zhang 626b2b77a04SHong Zhang Collective on Mat 627b2b77a04SHong Zhang 628b2b77a04SHong Zhang Input Parameter: 629b2b77a04SHong Zhang . mat - the matrix 630b2b77a04SHong Zhang 631b2b77a04SHong Zhang Output Parameter: 632b2b77a04SHong Zhang + fin - (optional) input vector of forward FFTW 633b2b77a04SHong Zhang - fout - (optional) output vector of forward FFTW 634b2b77a04SHong Zhang 635b2b77a04SHong Zhang Level: advanced 636b2b77a04SHong Zhang 637b2b77a04SHong Zhang .seealso: MatCreateFFTW() 638b2b77a04SHong Zhang */ 639b2b77a04SHong Zhang PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 640b2b77a04SHong Zhang { 641b2b77a04SHong Zhang PetscErrorCode ierr; 642b2b77a04SHong Zhang PetscMPIInt size,rank; 643b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 644b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 645c92418ccSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 6463c3a9c75SAmlan Barua PetscInt N=fft->N, N1, n1,vsize; 647e81bb599SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 648b2b77a04SHong Zhang 649b2b77a04SHong Zhang PetscFunctionBegin; 650b2b77a04SHong Zhang PetscValidHeaderSpecific(A,MAT_CLASSID,1); 651b2b77a04SHong Zhang PetscValidType(A,1); 652b2b77a04SHong Zhang 653b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 654b2b77a04SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 655b2b77a04SHong Zhang if (size == 1){ /* sequential case */ 656e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX) 657b2b77a04SHong Zhang if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 658b2b77a04SHong Zhang if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 659e5d7f247SAmlan Barua #else 660e81bb599SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 661e81bb599SAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 662e81bb599SAmlan Barua #endif 663b2b77a04SHong Zhang } else { /* mpi case */ 664b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 6656882372aSHong Zhang ptrdiff_t local_n1,local_1_end; 666b2b77a04SHong Zhang fftw_complex *data_fin,*data_fout; 66751d1eed7SAmlan Barua double *data_finr ; 668b3a17365SAmlan Barua ptrdiff_t local_1_start,temp; 669c92418ccSAmlan Barua // PetscInt ctr; 670c92418ccSAmlan Barua // ptrdiff_t ndim1,*pdim; 671c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 672c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 67311902ff2SHong Zhang 674c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 675f76f76beSAmlan Barua // {k 676c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 677c92418ccSAmlan Barua // } 678b2b77a04SHong Zhang 679f76f76beSAmlan Barua 680f76f76beSAmlan Barua 681b2b77a04SHong Zhang switch (ndim){ 682b2b77a04SHong Zhang case 1: 6836882372aSHong Zhang /* Get local size */ 684e5d7f247SAmlan Barua /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */ 685c92418ccSAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 6866882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 687*57625b84SAmlan Barua printf("The values of local_n0 and local_n1 are %d and %d",local_n0,local_n1); 6886882372aSHong Zhang if (fin) { 6896882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6906882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 6916882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6926882372aSHong Zhang } 6936882372aSHong Zhang if (fout) { 6946882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 695*57625b84SAmlan Barua ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6966882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6976882372aSHong Zhang } 698b2b77a04SHong Zhang break; 699b2b77a04SHong Zhang case 2: 7003c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 7013c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 7023c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 7033c3a9c75SAmlan Barua if (fin) { 7043c3a9c75SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 70554dd5118SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 7063c3a9c75SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 70709dd8a53SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 7083c3a9c75SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 7093c3a9c75SAmlan Barua } 710*57625b84SAmlan Barua n1 = 2*local_n1*(dim[0]); 711*57625b84SAmlan Barua //n1 = 2*local_n1*dim[1]; 712*57625b84SAmlan Barua printf("The values are %ld\n",local_n1); 7133c3a9c75SAmlan Barua if (fout) { 71409dd8a53SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 71509dd8a53SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 7163c3a9c75SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 7173c3a9c75SAmlan Barua } 718f76f76beSAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 7193c3a9c75SAmlan Barua 7203c3a9c75SAmlan Barua #else 721b2b77a04SHong Zhang /* Get local size */ 72264657d84SAmlan Barua //printf("Hope this does not come here"); 723b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 724b2b77a04SHong Zhang if (fin) { 725b2b77a04SHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 726b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 727b2b77a04SHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 728b2b77a04SHong Zhang } 729b2b77a04SHong Zhang if (fout) { 730b2b77a04SHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 731b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 732b2b77a04SHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 733b2b77a04SHong Zhang } 73464657d84SAmlan Barua //printf("Hope this does not come here"); 7353c3a9c75SAmlan Barua #endif 736b2b77a04SHong Zhang break; 737b2b77a04SHong Zhang case 3: 7386882372aSHong Zhang /* Get local size */ 7393c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 74051d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 74151d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 74251d1eed7SAmlan Barua if (fin) { 74351d1eed7SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 74451d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 74551d1eed7SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 74651d1eed7SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 74751d1eed7SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 74851d1eed7SAmlan Barua } 749*57625b84SAmlan Barua printf("The value is %ld",local_n1); 750*57625b84SAmlan Barua n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 75151d1eed7SAmlan Barua if (fout) { 75251d1eed7SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 75351d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 75451d1eed7SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 75551d1eed7SAmlan Barua } 75651d1eed7SAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 75751d1eed7SAmlan Barua 75851d1eed7SAmlan Barua 7593c3a9c75SAmlan Barua #else 7606882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 76111902ff2SHong Zhang // printf("The quantity n is %d",n); 7626882372aSHong Zhang if (fin) { 7636882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7646882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 7656882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 7666882372aSHong Zhang } 7676882372aSHong Zhang if (fout) { 7686882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7696882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 7706882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 7716882372aSHong Zhang } 7723c3a9c75SAmlan Barua #endif 773b2b77a04SHong Zhang break; 774b2b77a04SHong Zhang default: 7756882372aSHong Zhang /* Get local size */ 7763c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 777b3a17365SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 778b3a17365SAmlan Barua printf("The value of temp is %ld\n",temp); 779b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 780b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 781b3a17365SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 782b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 783b3a17365SAmlan Barua if (fin) { 784b3a17365SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 785b3a17365SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 786b3a17365SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 787b3a17365SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 788b3a17365SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 789b3a17365SAmlan Barua } 790*57625b84SAmlan Barua printf("The value is %ld. Global length is %d \n",local_n1,N1); 791*57625b84SAmlan Barua temp = fftw->partial_dim; 792*57625b84SAmlan Barua fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 793*57625b84SAmlan Barua 794*57625b84SAmlan Barua n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 795b3a17365SAmlan Barua if (fout) { 796b3a17365SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 797*57625b84SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 798b3a17365SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 799b3a17365SAmlan Barua } 800b3a17365SAmlan Barua 8013c3a9c75SAmlan Barua #else 802c92418ccSAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 80311902ff2SHong Zhang // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 80411902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 80511902ff2SHong Zhang // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 80611902ff2SHong Zhang // for(i=0;i<ndim;i++) 80711902ff2SHong Zhang // { 80811902ff2SHong Zhang // pdim[i]=dim[i];printf("%d",pdim[i]); 80911902ff2SHong Zhang // } 81011902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 81111902ff2SHong Zhang // printf("The quantity n is %d",n); 8126882372aSHong Zhang if (fin) { 8136882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 8146882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 8156882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 8166882372aSHong Zhang } 8176882372aSHong Zhang if (fout) { 8186882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 8196882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 8206882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 8216882372aSHong Zhang } 8223c3a9c75SAmlan Barua #endif 823b2b77a04SHong Zhang break; 824b2b77a04SHong Zhang } 825b2b77a04SHong Zhang } 82654dd5118SAmlan Barua // if (fin){ 82754dd5118SAmlan Barua // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 82854dd5118SAmlan Barua // } 82954dd5118SAmlan Barua // if (fout){ 83054dd5118SAmlan Barua // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 83154dd5118SAmlan Barua // } 832b2b77a04SHong Zhang PetscFunctionReturn(0); 833b2b77a04SHong Zhang } 834b2b77a04SHong Zhang 835b2b77a04SHong Zhang #undef __FUNCT__ 8363c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT" 8373c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 8383c3a9c75SAmlan Barua { 8393c3a9c75SAmlan Barua PetscErrorCode ierr; 8403c3a9c75SAmlan Barua PetscFunctionBegin; 8413c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 8423c3a9c75SAmlan Barua PetscFunctionReturn(0); 8433c3a9c75SAmlan Barua } 84454efbe56SHong Zhang 845b2b77a04SHong Zhang /* 8463c3a9c75SAmlan Barua InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block 8473c3a9c75SAmlan Barua Input A, x, y 8483c3a9c75SAmlan Barua A - FFTW matrix 84954dd5118SAmlan Barua x - user data 850b2b77a04SHong Zhang Options Database Keys: 851b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 852b2b77a04SHong Zhang 853b2b77a04SHong Zhang Level: intermediate 854b2b77a04SHong Zhang 855b2b77a04SHong Zhang */ 8563c3a9c75SAmlan Barua 8573c3a9c75SAmlan Barua EXTERN_C_BEGIN 8583c3a9c75SAmlan Barua #undef __FUNCT__ 8593c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW" 8603c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 8613c3a9c75SAmlan Barua { 8623c3a9c75SAmlan Barua PetscErrorCode ierr; 8633c3a9c75SAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 8643c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 8653c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 8663c3a9c75SAmlan Barua PetscInt N=fft->N, N1, n1 ,NM; 867b223cc97SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 868b223cc97SAmlan Barua PetscInt low, *indx1, *indx2, tempindx, tempindx1; 869e5d7f247SAmlan Barua PetscInt i,j,k,rank,size,partial_dim; 8703c3a9c75SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 871e5d7f247SAmlan Barua ptrdiff_t local_n1,local_1_start,temp; 8723c3a9c75SAmlan Barua VecScatter vecscat; 8733c3a9c75SAmlan Barua IS list1,list2; 8743c3a9c75SAmlan Barua 875f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 876f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 8773c3a9c75SAmlan Barua ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 878f76f76beSAmlan Barua printf("Local ownership starts at %d\n",low); 8793c3a9c75SAmlan Barua 880e81bb599SAmlan Barua if (size==1) 881e81bb599SAmlan Barua { 8827536937bSAmlan Barua /* switch (ndim){ 883e81bb599SAmlan Barua case 1: 884e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 885e81bb599SAmlan Barua for (i=0;i<dim[0];i++) 886e81bb599SAmlan Barua { 887e81bb599SAmlan Barua indx1[i] = i; 888e81bb599SAmlan Barua } 889e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 890e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 891e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 892e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 893e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 894b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 895b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 896e81bb599SAmlan Barua break; 897e81bb599SAmlan Barua 898e81bb599SAmlan Barua case 2: 899e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 900e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 901e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 902e81bb599SAmlan Barua indx1[i*dim[1]+j] = i*dim[1] + j; 903e81bb599SAmlan Barua } 904e81bb599SAmlan Barua } 905e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 906e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 907e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 908e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 910b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 911b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 912e81bb599SAmlan Barua break; 913e81bb599SAmlan Barua case 3: 914e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 915e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 916e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 917e81bb599SAmlan Barua for (k=0;k<dim[2];k++){ 918e81bb599SAmlan Barua indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 919e81bb599SAmlan Barua } 920e81bb599SAmlan Barua } 921e81bb599SAmlan Barua } 922e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 923e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 924e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 925e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 926e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 927b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 928b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 929e81bb599SAmlan Barua break; 930e81bb599SAmlan Barua default: 9317536937bSAmlan Barua */ 9326971673cSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1); 9336971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 9346971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9356971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9366971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 937b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 9386971673cSAmlan Barua //ierr = ISDestroy(list1);CHKERRQ(ierr); 9397536937bSAmlan Barua // break; 9407536937bSAmlan Barua // } 941e81bb599SAmlan Barua } 942e81bb599SAmlan Barua 943e81bb599SAmlan Barua else{ 944e81bb599SAmlan Barua 9453c3a9c75SAmlan Barua switch (ndim){ 9463c3a9c75SAmlan Barua case 1: 94764657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 94864657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 94964657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 95064657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 95164657d84SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 95264657d84SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 95364657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 95464657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 95564657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 95664657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 95764657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 95864657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 95964657d84SAmlan Barua break; 96064657d84SAmlan Barua #else 961e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 96264657d84SAmlan Barua #endif 9633c3a9c75SAmlan Barua break; 9643c3a9c75SAmlan Barua case 2: 965bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 966bd59e6c6SAmlan Barua //PetscInt my_value; 967bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 968bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 969bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 970bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 971bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 972bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 973bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 974bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 975bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 976bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 977bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 978bd59e6c6SAmlan Barua break; 979bd59e6c6SAmlan Barua #else 9803c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 9813c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 9823c3a9c75SAmlan Barua 9835b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 9845b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 9855b3e41ffSAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 9863c3a9c75SAmlan Barua 9873c3a9c75SAmlan Barua if (dim[1]%2==0) 9883c3a9c75SAmlan Barua NM = dim[1]+2; 9893c3a9c75SAmlan Barua else 9903c3a9c75SAmlan Barua NM = dim[1]+1; 9913c3a9c75SAmlan Barua 9923c3a9c75SAmlan Barua for (i=0;i<local_n0;i++){ 9933c3a9c75SAmlan Barua for (j=0;j<dim[1];j++){ 9943c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 9953c3a9c75SAmlan Barua tempindx1 = i*NM + j; 9965b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 9973c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 9985b3e41ffSAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 9995b3e41ffSAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 10005b3e41ffSAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 10015b3e41ffSAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 10023c3a9c75SAmlan Barua } 10033c3a9c75SAmlan Barua } 10043c3a9c75SAmlan Barua 10053c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 10063c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 10073c3a9c75SAmlan Barua 1008f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1009f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1012b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1013b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1014b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1015b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 10163c3a9c75SAmlan Barua break; 1017bd59e6c6SAmlan Barua #endif 10183c3a9c75SAmlan Barua 10193c3a9c75SAmlan Barua case 3: 1020bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1021bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1022bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1023bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1024bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1025bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1026bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1027bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1030bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1031bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1032bd59e6c6SAmlan Barua break; 1033bd59e6c6SAmlan Barua #else 103451d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 103551d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 103651d1eed7SAmlan Barua 103751d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 103851d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 103951d1eed7SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 104051d1eed7SAmlan Barua 104151d1eed7SAmlan Barua if (dim[2]%2==0) 104251d1eed7SAmlan Barua NM = dim[2]+2; 104351d1eed7SAmlan Barua else 104451d1eed7SAmlan Barua NM = dim[2]+1; 104551d1eed7SAmlan Barua 104651d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 104751d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 104851d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 104951d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 105051d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 105151d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 105251d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 105351d1eed7SAmlan Barua } 105451d1eed7SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 105551d1eed7SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 105651d1eed7SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 105751d1eed7SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 105851d1eed7SAmlan Barua } 105951d1eed7SAmlan Barua } 106051d1eed7SAmlan Barua 106151d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 106251d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 106351d1eed7SAmlan Barua 106451d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 106551d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 106651d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 106751d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1068b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1069b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1070b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1071b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 10723c3a9c75SAmlan Barua break; 1073bd59e6c6SAmlan Barua #endif 10743c3a9c75SAmlan Barua 10753c3a9c75SAmlan Barua default: 1076bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1077bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1078bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1079bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1080bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1081bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1082bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1083bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1084bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1085bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1086bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1087bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1088bd59e6c6SAmlan Barua 1089bd59e6c6SAmlan Barua 1090bd59e6c6SAmlan Barua #else 1091e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1092e5d7f247SAmlan Barua printf("The value of temp is %ld\n",temp); 1093e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1094e5d7f247SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1095e5d7f247SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1096e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1097e5d7f247SAmlan Barua 1098e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 1099e5d7f247SAmlan Barua printf("The value of partial dim is %d\n",partial_dim); 1100e5d7f247SAmlan Barua 1101e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1102e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1103e5d7f247SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 1104e5d7f247SAmlan Barua 1105e5d7f247SAmlan Barua if (dim[ndim-1]%2==0) 1106ba6e06d0SAmlan Barua NM = 2; 1107e5d7f247SAmlan Barua else 1108ba6e06d0SAmlan Barua NM = 1; 1109e5d7f247SAmlan Barua 11106971673cSAmlan Barua j = low; 11116971673cSAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 11126971673cSAmlan Barua { 11136971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 11146971673cSAmlan Barua indx2[i] = j; 1115ba6e06d0SAmlan Barua //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank); 11166971673cSAmlan Barua if (k%dim[ndim-1]==0) 11176971673cSAmlan Barua { j+=NM;} 11186971673cSAmlan Barua j++; 11196971673cSAmlan Barua } 11206971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 11216971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 11226971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 11236971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 11246971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 11256971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1126b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1127b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1128b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1129b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 11303c3a9c75SAmlan Barua break; 1131bd59e6c6SAmlan Barua #endif 11323c3a9c75SAmlan Barua } 1133e81bb599SAmlan Barua } 11343c3a9c75SAmlan Barua 11353c3a9c75SAmlan Barua return 0; 11363c3a9c75SAmlan Barua } 11373c3a9c75SAmlan Barua EXTERN_C_END 11383c3a9c75SAmlan Barua 11393c3a9c75SAmlan Barua #undef __FUNCT__ 11403c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT" 11413c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 11423c3a9c75SAmlan Barua { 11433c3a9c75SAmlan Barua PetscErrorCode ierr; 11443c3a9c75SAmlan Barua PetscFunctionBegin; 11453c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 11463c3a9c75SAmlan Barua PetscFunctionReturn(0); 11473c3a9c75SAmlan Barua } 114854efbe56SHong Zhang 11495b3e41ffSAmlan Barua /* 11505b3e41ffSAmlan Barua OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 11515b3e41ffSAmlan Barua Input A, x, y 11525b3e41ffSAmlan Barua A - FFTW matrix 11535b3e41ffSAmlan Barua x - FFTW vector 11545b3e41ffSAmlan Barua y - PETSc vector that user can read 11555b3e41ffSAmlan Barua Options Database Keys: 11565b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 11575b3e41ffSAmlan Barua 11585b3e41ffSAmlan Barua Level: intermediate 11595b3e41ffSAmlan Barua 11603c3a9c75SAmlan Barua */ 11613c3a9c75SAmlan Barua 11623c3a9c75SAmlan Barua EXTERN_C_BEGIN 11633c3a9c75SAmlan Barua #undef __FUNCT__ 11645b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW" 11655b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 11665b3e41ffSAmlan Barua { 11675b3e41ffSAmlan Barua PetscErrorCode ierr; 11685b3e41ffSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 11695b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 11705b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 11715b3e41ffSAmlan Barua PetscInt N=fft->N, N1, n1 ,NM; 1172b223cc97SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 1173b223cc97SAmlan Barua PetscInt low, *indx1, *indx2, tempindx, tempindx1; 1174ba6e06d0SAmlan Barua PetscInt i,j,k,rank,size,partial_dim; 11755b3e41ffSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 1176ba6e06d0SAmlan Barua ptrdiff_t local_n1,local_1_start,temp; 11775b3e41ffSAmlan Barua VecScatter vecscat; 11785b3e41ffSAmlan Barua IS list1,list2; 11795b3e41ffSAmlan Barua 11805b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 11815b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1182cfe1ae98SAmlan Barua ierr = VecGetOwnershipRange(x,&low,PETSC_NULL); 11835b3e41ffSAmlan Barua 1184e81bb599SAmlan Barua if (size==1){ 11857536937bSAmlan Barua /* 11865b3e41ffSAmlan Barua switch (ndim){ 11875b3e41ffSAmlan Barua case 1: 1188e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 1189e81bb599SAmlan Barua for (i=0;i<dim[0];i++) 1190e81bb599SAmlan Barua { 1191e81bb599SAmlan Barua indx1[i] = i; 1192e81bb599SAmlan Barua } 1193e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1194e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1195e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1196e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1197e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1198b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1199b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1200e81bb599SAmlan Barua break; 1201e81bb599SAmlan Barua 1202e81bb599SAmlan Barua case 2: 1203e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 1204e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 1205e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 1206e81bb599SAmlan Barua indx1[i*dim[1]+j] = i*dim[1] + j; 1207e81bb599SAmlan Barua } 1208e81bb599SAmlan Barua } 1209e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1210e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1211e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1212e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1213e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1214b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1215b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1216e81bb599SAmlan Barua break; 1217e81bb599SAmlan Barua case 3: 1218e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1219e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 1220e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 1221e81bb599SAmlan Barua for (k=0;k<dim[2];k++){ 1222e81bb599SAmlan Barua indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 1223e81bb599SAmlan Barua } 1224e81bb599SAmlan Barua } 1225e81bb599SAmlan Barua } 1226e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1227e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1228e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1231b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1232b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1233e81bb599SAmlan Barua break; 1234e81bb599SAmlan Barua default: 12357536937bSAmlan Barua */ 12366971673cSAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1); 12376971673cSAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF); 12386971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 12396971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12406971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12416971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1242b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 12437536937bSAmlan Barua // break; 12447536937bSAmlan Barua // } 1245e81bb599SAmlan Barua } 1246e81bb599SAmlan Barua else{ 1247e81bb599SAmlan Barua 1248e81bb599SAmlan Barua switch (ndim){ 1249e81bb599SAmlan Barua case 1: 125064657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 125164657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 125264657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 125364657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 125464657d84SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 125564657d84SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 125664657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 125764657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 125864657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 125964657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 126064657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 126164657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 126264657d84SAmlan Barua break; 126364657d84SAmlan Barua 126464657d84SAmlan Barua #else 12656a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 126664657d84SAmlan Barua #endif 12675b3e41ffSAmlan Barua break; 12685b3e41ffSAmlan Barua case 2: 1269bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1270bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1271bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 1272bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 1273bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1274bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1275bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1276bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1277bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1278bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1279bd59e6c6SAmlan Barua break; 1280bd59e6c6SAmlan Barua #else 12815b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 12825b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 12835b3e41ffSAmlan Barua 12845b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 12855b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 12865b3e41ffSAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 12875b3e41ffSAmlan Barua 12885b3e41ffSAmlan Barua if (dim[1]%2==0) 12895b3e41ffSAmlan Barua NM = dim[1]+2; 12905b3e41ffSAmlan Barua else 12915b3e41ffSAmlan Barua NM = dim[1]+1; 12925b3e41ffSAmlan Barua 12935b3e41ffSAmlan Barua 12945b3e41ffSAmlan Barua 12955b3e41ffSAmlan Barua for (i=0;i<local_n0;i++){ 12965b3e41ffSAmlan Barua for (j=0;j<dim[1];j++){ 12975b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 12985b3e41ffSAmlan Barua tempindx1 = i*NM + j; 12995b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 13005b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 1301cfe1ae98SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 1302cfe1ae98SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 1303cfe1ae98SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 1304cfe1ae98SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 13055b3e41ffSAmlan Barua } 13065b3e41ffSAmlan Barua } 13075b3e41ffSAmlan Barua 13085b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 13095b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 13105b3e41ffSAmlan Barua 13115b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 13125b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 13135b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 13145b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1315b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1316b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1317b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1318b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 13195b3e41ffSAmlan Barua break; 1320bd59e6c6SAmlan Barua #endif 13215b3e41ffSAmlan Barua case 3: 1322bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1323bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1324bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1325bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1326bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1327bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1328bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1329bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1330bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1331bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1332bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1333bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1334bd59e6c6SAmlan Barua break; 1335bd59e6c6SAmlan Barua #else 1336bd59e6c6SAmlan Barua 133751d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 133851d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 133951d1eed7SAmlan Barua 134051d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 134151d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 134251d1eed7SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 134351d1eed7SAmlan Barua 134451d1eed7SAmlan Barua if (dim[2]%2==0) 134551d1eed7SAmlan Barua NM = dim[2]+2; 134651d1eed7SAmlan Barua else 134751d1eed7SAmlan Barua NM = dim[2]+1; 134851d1eed7SAmlan Barua 134951d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 135051d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 135151d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 135251d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 135351d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 135451d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 135551d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 135651d1eed7SAmlan Barua } 135751d1eed7SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 135851d1eed7SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 135951d1eed7SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 136051d1eed7SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 136151d1eed7SAmlan Barua } 136251d1eed7SAmlan Barua } 136351d1eed7SAmlan Barua 136451d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 136551d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 136651d1eed7SAmlan Barua 136751d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 136851d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 136951d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 137051d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1371b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1372b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1373b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1374b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 13755b3e41ffSAmlan Barua break; 1376bd59e6c6SAmlan Barua #endif 13775b3e41ffSAmlan Barua default: 1378bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1379bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1380bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1381bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1382bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1383bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1384bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1385bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1386bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1387bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1388bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1389bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1390bd59e6c6SAmlan Barua #else 1391ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1392ba6e06d0SAmlan Barua printf("The value of temp is %ld\n",temp); 1393ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1394ba6e06d0SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1395ba6e06d0SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1396ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1397ba6e06d0SAmlan Barua 1398ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1399ba6e06d0SAmlan Barua printf("The value of partial dim is %d\n",partial_dim); 1400ba6e06d0SAmlan Barua 1401ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1402ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1403ba6e06d0SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 1404ba6e06d0SAmlan Barua 1405ba6e06d0SAmlan Barua if (dim[ndim-1]%2==0) 1406ba6e06d0SAmlan Barua NM = 2; 1407ba6e06d0SAmlan Barua else 1408ba6e06d0SAmlan Barua NM = 1; 1409ba6e06d0SAmlan Barua 1410ba6e06d0SAmlan Barua j = low; 1411ba6e06d0SAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1412ba6e06d0SAmlan Barua { 1413ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1414ba6e06d0SAmlan Barua indx2[i] = j; 1415ba6e06d0SAmlan Barua //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank); 1416ba6e06d0SAmlan Barua if (k%dim[ndim-1]==0) 1417ba6e06d0SAmlan Barua { j+=NM;} 1418ba6e06d0SAmlan Barua j++; 1419ba6e06d0SAmlan Barua } 1420ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1421ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1422ba6e06d0SAmlan Barua 1423ba6e06d0SAmlan Barua //ISView(list1,PETSC_VIEWER_STDOUT_SELF); 1424ba6e06d0SAmlan Barua 1425ba6e06d0SAmlan Barua 1426ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1427ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1428ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1429ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1430b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1431b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1432b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1433b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1434ba6e06d0SAmlan Barua 14355b3e41ffSAmlan Barua break; 1436bd59e6c6SAmlan Barua #endif 14375b3e41ffSAmlan Barua } 1438e81bb599SAmlan Barua } 14395b3e41ffSAmlan Barua return 0; 14405b3e41ffSAmlan Barua } 14415b3e41ffSAmlan Barua EXTERN_C_END 14425b3e41ffSAmlan Barua 14435b3e41ffSAmlan Barua EXTERN_C_BEGIN 14445b3e41ffSAmlan Barua #undef __FUNCT__ 14453c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 14463c3a9c75SAmlan Barua /* 14473c3a9c75SAmlan Barua MatCreate_FFTW - Creates a matrix object that provides FFT 14483c3a9c75SAmlan Barua via the external package FFTW 14493c3a9c75SAmlan Barua Options Database Keys: 14503c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 14513c3a9c75SAmlan Barua 14523c3a9c75SAmlan Barua Level: intermediate 14533c3a9c75SAmlan Barua 14543c3a9c75SAmlan Barua */ 14553c3a9c75SAmlan Barua 1456b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 1457b2b77a04SHong Zhang { 1458b2b77a04SHong Zhang PetscErrorCode ierr; 1459b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 1460b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1461b2b77a04SHong Zhang Mat_FFTW *fftw; 1462b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1463b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1464b2b77a04SHong Zhang PetscBool flg; 1465b3a17365SAmlan Barua PetscInt p_flag,partial_dim=1,ctr,N1; 146611902ff2SHong Zhang PetscMPIInt size,rank; 1467b3a17365SAmlan Barua ptrdiff_t *pdim, temp; 14684a16a297SSean Farley ptrdiff_t local_n1,local_1_start,local_1_end; 1469b2b77a04SHong Zhang 1470b2b77a04SHong Zhang PetscFunctionBegin; 1471b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 147211902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 147354dd5118SAmlan Barua ierr = MPI_Barrier(PETSC_COMM_WORLD); 1474c92418ccSAmlan Barua 147511902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 147611902ff2SHong Zhang pdim[0] = dim[0]; 147711902ff2SHong Zhang for(ctr=1;ctr<ndim;ctr++) 147811902ff2SHong Zhang { 14796882372aSHong Zhang partial_dim *= dim[ctr]; 148011902ff2SHong Zhang pdim[ctr] = dim[ctr]; 14816882372aSHong Zhang } 14823c3a9c75SAmlan Barua 1483b2b77a04SHong Zhang if (size == 1) { 1484e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1485b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1486b2b77a04SHong Zhang n = N; 1487e81bb599SAmlan Barua #else 1488e81bb599SAmlan Barua int tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1]; 1489e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1490e81bb599SAmlan Barua n = tot_dim; 1491e81bb599SAmlan Barua #endif 1492e81bb599SAmlan Barua 1493b2b77a04SHong Zhang } else { 1494b223cc97SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start;//local_n1,local_1_end; 1495b2b77a04SHong Zhang switch (ndim){ 1496b2b77a04SHong Zhang case 1: 14973c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 14983c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1499e5d7f247SAmlan Barua #else 15006882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 15016882372aSHong Zhang n = (PetscInt)local_n0; 1502798b254bSAmlan Barua printf("The value of n is %d",n); 15036882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1504e5d7f247SAmlan Barua #endif 15053c3a9c75SAmlan Barua // PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW); 1506b2b77a04SHong Zhang break; 1507b2b77a04SHong Zhang case 2: 15085b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 1509b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1510b2b77a04SHong Zhang /* 1511b2b77a04SHong Zhang PetscMPIInt rank; 1512b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1513b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 1514b2b77a04SHong Zhang */ 1515b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1516b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 15175b3e41ffSAmlan Barua #else 15185b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 15195b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 15205b3e41ffSAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 15215b3e41ffSAmlan Barua #endif 1522b2b77a04SHong Zhang break; 1523b2b77a04SHong Zhang case 3: 152411902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 152551d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 152651d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 15276882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 15286882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 152951d1eed7SAmlan Barua #else 153051d1eed7SAmlan Barua printf("The code comes here\n"); 153151d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 153251d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 153351d1eed7SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 153451d1eed7SAmlan Barua #endif 1535b2b77a04SHong Zhang break; 1536b2b77a04SHong Zhang default: 1537b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 153811902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1539c92418ccSAmlan Barua // printf("The value of alloc local is %ld from process %d\n",alloc_local,rank); 154011902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 15416882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 154211902ff2SHong Zhang // printf("New partial dimension is %d %d %d",n,N,ndim); 15436882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1544b3a17365SAmlan Barua #else 1545b3a17365SAmlan Barua temp = pdim[ndim-1]; 1546b3a17365SAmlan Barua pdim[ndim-1]= temp/2 + 1; 1547b3a17365SAmlan Barua printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]); 1548b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1549b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1550b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1551b3a17365SAmlan Barua pdim[ndim-1] = temp; 1552b3a17365SAmlan Barua printf("For Multi dim case n = %d, N1 = %d\n",n,N1); 1553b3a17365SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1554b3a17365SAmlan Barua #endif 1555b2b77a04SHong Zhang break; 1556b2b77a04SHong Zhang } 1557b2b77a04SHong Zhang } 1558b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1559b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1560b2b77a04SHong Zhang fft->data = (void*)fftw; 1561b2b77a04SHong Zhang 1562b2b77a04SHong Zhang fft->n = n; 1563c92418ccSAmlan Barua fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1564e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 1565c92418ccSAmlan Barua ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1566b1301b2eSHong Zhang for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1567c92418ccSAmlan Barua 1568b2b77a04SHong Zhang fftw->p_forward = 0; 1569b2b77a04SHong Zhang fftw->p_backward = 0; 1570b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1571b2b77a04SHong Zhang 1572b2b77a04SHong Zhang if (size == 1){ 1573b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1574b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1575b2b77a04SHong Zhang } else { 1576b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1577b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1578b2b77a04SHong Zhang } 1579b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 15806a506ed8SAmlan Barua // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D 1581b2b77a04SHong Zhang A->ops->getvecs = MatGetVecs_FFTW; 1582b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 15834f7415efSAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFT_C","MatGetVecsFFT_FFTW",MatGetVecsFFT_FFTW); 15843c3a9c75SAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 15855b3e41ffSAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 1586b2b77a04SHong Zhang 1587b2b77a04SHong Zhang /* get runtime options */ 1588b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1589b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1590b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 1591b2b77a04SHong Zhang PetscOptionsEnd(); 1592b2b77a04SHong Zhang PetscFunctionReturn(0); 1593b2b77a04SHong Zhang } 1594b2b77a04SHong Zhang EXTERN_C_END 15953c3a9c75SAmlan Barua 15963c3a9c75SAmlan Barua 15973c3a9c75SAmlan Barua 15983c3a9c75SAmlan Barua 1599