1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14e5d7f247SAmlan Barua PetscInt partial_dim; 15b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 16b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 19b2b77a04SHong Zhang } Mat_FFTW; 20b2b77a04SHong Zhang 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 28*4f7415efSAmlan Barua extern PetscErrorCode MatGetVecsFFT_FFTW(Mat,Vec*,Vec*,Vec*); 29b2b77a04SHong Zhang 30b2b77a04SHong Zhang #undef __FUNCT__ 31b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 32b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 33b2b77a04SHong Zhang { 34b2b77a04SHong Zhang PetscErrorCode ierr; 35b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 36b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 37b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 38b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 39b2b77a04SHong Zhang 40b2b77a04SHong Zhang PetscFunctionBegin; 41b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 42b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 43b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 44b2b77a04SHong Zhang switch (ndim){ 45b2b77a04SHong Zhang case 1: 4658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 47b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 4858a912c5SAmlan Barua #else 4958a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 5058a912c5SAmlan Barua #endif 51b2b77a04SHong Zhang break; 52b2b77a04SHong Zhang case 2: 5358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 54b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 5558a912c5SAmlan Barua #else 5658a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 5758a912c5SAmlan Barua #endif 58b2b77a04SHong Zhang break; 59b2b77a04SHong Zhang case 3: 6058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 61b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6258a912c5SAmlan Barua #else 6358a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 6458a912c5SAmlan Barua #endif 65b2b77a04SHong Zhang break; 66b2b77a04SHong Zhang default: 6758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 68b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6958a912c5SAmlan Barua #else 7058a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 7158a912c5SAmlan Barua #endif 72b2b77a04SHong Zhang break; 73b2b77a04SHong Zhang } 74b2b77a04SHong Zhang fftw->finarray = x_array; 75b2b77a04SHong Zhang fftw->foutarray = y_array; 76b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 77b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 78b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 79b2b77a04SHong Zhang } else { /* use existing plan */ 80b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 81b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 82b2b77a04SHong Zhang } else { 83b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 84b2b77a04SHong Zhang } 85b2b77a04SHong Zhang } 86b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 87b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 88b2b77a04SHong Zhang PetscFunctionReturn(0); 89b2b77a04SHong Zhang } 90b2b77a04SHong Zhang 91b2b77a04SHong Zhang #undef __FUNCT__ 92b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 93b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 94b2b77a04SHong Zhang { 95b2b77a04SHong Zhang PetscErrorCode ierr; 96b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 97b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 98b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 99b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 100b2b77a04SHong Zhang 101b2b77a04SHong Zhang PetscFunctionBegin; 102b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 103b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 104b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 105b2b77a04SHong Zhang switch (ndim){ 106b2b77a04SHong Zhang case 1: 10758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 108b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 10958a912c5SAmlan Barua #else 110e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 11158a912c5SAmlan Barua #endif 112b2b77a04SHong Zhang break; 113b2b77a04SHong Zhang case 2: 11458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 115b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 11658a912c5SAmlan Barua #else 117e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 11858a912c5SAmlan Barua #endif 119b2b77a04SHong Zhang break; 120b2b77a04SHong Zhang case 3: 12158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 122b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12358a912c5SAmlan Barua #else 124e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 12558a912c5SAmlan Barua #endif 126b2b77a04SHong Zhang break; 127b2b77a04SHong Zhang default: 12858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 129b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 13058a912c5SAmlan Barua #else 131e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 13258a912c5SAmlan Barua #endif 133b2b77a04SHong Zhang break; 134b2b77a04SHong Zhang } 135b2b77a04SHong Zhang fftw->binarray = x_array; 136b2b77a04SHong Zhang fftw->boutarray = y_array; 137b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 138b2b77a04SHong Zhang } else { /* use existing plan */ 139b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 140b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 141b2b77a04SHong Zhang } else { 142b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 143b2b77a04SHong Zhang } 144b2b77a04SHong Zhang } 145b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 146b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 147b2b77a04SHong Zhang PetscFunctionReturn(0); 148b2b77a04SHong Zhang } 149b2b77a04SHong Zhang 150b2b77a04SHong Zhang #undef __FUNCT__ 151b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 152b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 153b2b77a04SHong Zhang { 154b2b77a04SHong Zhang PetscErrorCode ierr; 155b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 156b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 157b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 158c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 159b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 160b2b77a04SHong Zhang 161b2b77a04SHong Zhang PetscFunctionBegin; 162b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 163b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 164b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 165b2b77a04SHong Zhang switch (ndim){ 166b2b77a04SHong Zhang case 1: 1673c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 168b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 169ae0a50aaSHong Zhang #else 170ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 1713c3a9c75SAmlan Barua #endif 172b2b77a04SHong Zhang break; 173b2b77a04SHong Zhang case 2: 1743c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 175b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1763c3a9c75SAmlan Barua #else 1773c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1783c3a9c75SAmlan Barua #endif 179b2b77a04SHong Zhang break; 180b2b77a04SHong Zhang case 3: 1813c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 182b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1833c3a9c75SAmlan Barua #else 18451d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1853c3a9c75SAmlan Barua #endif 186b2b77a04SHong Zhang break; 187b2b77a04SHong Zhang default: 1883c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 189c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1903c3a9c75SAmlan Barua #else 1913c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1923c3a9c75SAmlan Barua #endif 193b2b77a04SHong Zhang break; 194b2b77a04SHong Zhang } 195b2b77a04SHong Zhang fftw->finarray = x_array; 196b2b77a04SHong Zhang fftw->foutarray = y_array; 197b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 198b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 199b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 200b2b77a04SHong Zhang } else { /* use existing plan */ 201b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 202b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 203b2b77a04SHong Zhang } else { 204b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 205b2b77a04SHong Zhang } 206b2b77a04SHong Zhang } 207b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 208b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 209b2b77a04SHong Zhang PetscFunctionReturn(0); 210b2b77a04SHong Zhang } 211b2b77a04SHong Zhang 212b2b77a04SHong Zhang #undef __FUNCT__ 213b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 214b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 215b2b77a04SHong Zhang { 216b2b77a04SHong Zhang PetscErrorCode ierr; 217b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 218b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 219b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 220c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 221b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 222b2b77a04SHong Zhang 223b2b77a04SHong Zhang PetscFunctionBegin; 224b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 225b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 226b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 227b2b77a04SHong Zhang switch (ndim){ 228b2b77a04SHong Zhang case 1: 2293c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 230b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 231ae0a50aaSHong Zhang #else 232ae0a50aaSHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 2333c3a9c75SAmlan Barua #endif 234b2b77a04SHong Zhang break; 235b2b77a04SHong Zhang case 2: 2363c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 237b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2383c3a9c75SAmlan Barua #else 2393c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2403c3a9c75SAmlan Barua #endif 241b2b77a04SHong Zhang break; 242b2b77a04SHong Zhang case 3: 2433c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 244b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2453c3a9c75SAmlan Barua #else 2463c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2473c3a9c75SAmlan Barua #endif 248b2b77a04SHong Zhang break; 249b2b77a04SHong Zhang default: 2503c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 251c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2523c3a9c75SAmlan Barua #else 2533c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2543c3a9c75SAmlan Barua #endif 255b2b77a04SHong Zhang break; 256b2b77a04SHong Zhang } 257b2b77a04SHong Zhang fftw->binarray = x_array; 258b2b77a04SHong Zhang fftw->boutarray = y_array; 259b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 260b2b77a04SHong Zhang } else { /* use existing plan */ 261b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 262b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 263b2b77a04SHong Zhang } else { 264b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 265b2b77a04SHong Zhang } 266b2b77a04SHong Zhang } 267b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 268b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 269b2b77a04SHong Zhang PetscFunctionReturn(0); 270b2b77a04SHong Zhang } 271b2b77a04SHong Zhang 272b2b77a04SHong Zhang #undef __FUNCT__ 273b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 274b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 275b2b77a04SHong Zhang { 276b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 277b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 278b2b77a04SHong Zhang PetscErrorCode ierr; 279b2b77a04SHong Zhang 280b2b77a04SHong Zhang PetscFunctionBegin; 281b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 282b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 283b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 284bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 285b2b77a04SHong Zhang PetscFunctionReturn(0); 286b2b77a04SHong Zhang } 287b2b77a04SHong Zhang 288c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 289b2b77a04SHong Zhang #undef __FUNCT__ 290b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 291b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 292b2b77a04SHong Zhang { 293b2b77a04SHong Zhang PetscErrorCode ierr; 294b2b77a04SHong Zhang PetscScalar *array; 295b2b77a04SHong Zhang 296b2b77a04SHong Zhang PetscFunctionBegin; 297b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 298b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 299b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 300b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 301b2b77a04SHong Zhang PetscFunctionReturn(0); 302b2b77a04SHong Zhang } 303b2b77a04SHong Zhang 304b2b77a04SHong Zhang #undef __FUNCT__ 3053c3a9c75SAmlan Barua #define __FUNCT__ "MatGetVecs1DC_FFTW" 306c92418ccSAmlan Barua /* 307c92418ccSAmlan Barua MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the 308c92418ccSAmlan Barua parallel layout determined by FFTW-1D 309c92418ccSAmlan Barua 310c92418ccSAmlan Barua */ 3116a506ed8SAmlan Barua PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bout) 312c92418ccSAmlan Barua { 313c92418ccSAmlan Barua PetscErrorCode ierr; 314c92418ccSAmlan Barua PetscMPIInt size,rank; 315c92418ccSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 316c92418ccSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 317c92418ccSAmlan Barua // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 318c92418ccSAmlan Barua PetscInt N=fft->N; 319c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 320c92418ccSAmlan Barua ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 321c92418ccSAmlan Barua ptrdiff_t f_local_n1,f_local_1_end; 322c92418ccSAmlan Barua ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 323c92418ccSAmlan Barua ptrdiff_t b_local_n1,b_local_1_end; 324ae0a50aaSHong Zhang fftw_complex *data_fin,*data_fout,*data_bout; 325c92418ccSAmlan Barua 326c92418ccSAmlan Barua PetscFunctionBegin; 327c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 328c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 329c92418ccSAmlan Barua #endif 330c92418ccSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 331c92418ccSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 332c92418ccSAmlan Barua if (size == 1){ 333c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 334c92418ccSAmlan Barua } 335c92418ccSAmlan Barua else { 336c92418ccSAmlan Barua if (ndim>1){ 337c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 338c92418ccSAmlan Barua else { 339c92418ccSAmlan Barua f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 340c92418ccSAmlan Barua b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 341c92418ccSAmlan Barua if (fin) { 342c92418ccSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 343c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 344c92418ccSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 345c92418ccSAmlan Barua } 346c92418ccSAmlan Barua if (fout) { 347c92418ccSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 348c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 349c92418ccSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 350c92418ccSAmlan Barua } 351c92418ccSAmlan Barua if (bout) { 352c92418ccSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 353c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 354c92418ccSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 355c92418ccSAmlan Barua } 356c92418ccSAmlan Barua } 357c92418ccSAmlan Barua if (fin){ 358c92418ccSAmlan Barua ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 359c92418ccSAmlan Barua } 360c92418ccSAmlan Barua if (fout){ 361c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 362c92418ccSAmlan Barua } 363c92418ccSAmlan Barua if (bout){ 364c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 365c92418ccSAmlan Barua } 366c92418ccSAmlan Barua PetscFunctionReturn(0); 367c92418ccSAmlan Barua } 368c92418ccSAmlan Barua 369c92418ccSAmlan Barua 370c92418ccSAmlan Barua } 3713c3a9c75SAmlan Barua 372*4f7415efSAmlan Barua 373*4f7415efSAmlan Barua #undef __FUNCT__ 374*4f7415efSAmlan Barua #define __FUNCT__ "MatGetVecsFFT" 375*4f7415efSAmlan Barua PetscErrorCode MatGetVecsFFT(Mat A,Vec *x,Vec *y,Vec *z) 376*4f7415efSAmlan Barua { 377*4f7415efSAmlan Barua PetscErrorCode ierr; 378*4f7415efSAmlan Barua PetscFunctionBegin; 379*4f7415efSAmlan Barua ierr = PetscTryMethod(A,"MatGetVecsFFT_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 380*4f7415efSAmlan Barua PetscFunctionReturn(0); 381*4f7415efSAmlan Barua } 382*4f7415efSAmlan Barua 383*4f7415efSAmlan Barua #undef __FUNCT__ 384*4f7415efSAmlan Barua #define __FUNCT__ "MatGetVecsFFT_FFTW" 385*4f7415efSAmlan Barua /* 386*4f7415efSAmlan Barua MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 387*4f7415efSAmlan Barua parallel layout determined by FFTW 388*4f7415efSAmlan Barua 389*4f7415efSAmlan Barua Collective on Mat 390*4f7415efSAmlan Barua 391*4f7415efSAmlan Barua Input Parameter: 392*4f7415efSAmlan Barua . mat - the matrix 393*4f7415efSAmlan Barua 394*4f7415efSAmlan Barua Output Parameter: 395*4f7415efSAmlan Barua + fin - (optional) input vector of forward FFTW 396*4f7415efSAmlan Barua - fout - (optional) output vector of forward FFTW 397*4f7415efSAmlan Barua 398*4f7415efSAmlan Barua Level: advanced 399*4f7415efSAmlan Barua 400*4f7415efSAmlan Barua .seealso: MatCreateFFTW() 401*4f7415efSAmlan Barua */ 402*4f7415efSAmlan Barua EXTERN_C_BEGIN 403*4f7415efSAmlan Barua PetscErrorCode MatGetVecsFFT_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 404*4f7415efSAmlan Barua { 405*4f7415efSAmlan Barua PetscErrorCode ierr; 406*4f7415efSAmlan Barua PetscMPIInt size,rank; 407*4f7415efSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 408*4f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 409*4f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 410*4f7415efSAmlan Barua PetscInt N=fft->N, N1, n1,vsize; 411*4f7415efSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 412*4f7415efSAmlan Barua 413*4f7415efSAmlan Barua PetscFunctionBegin; 414*4f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 415*4f7415efSAmlan Barua PetscValidType(A,1); 416*4f7415efSAmlan Barua 417*4f7415efSAmlan Barua ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 418*4f7415efSAmlan Barua ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 419*4f7415efSAmlan Barua printf("size is %d\n",size); 420*4f7415efSAmlan Barua if (size == 1){ /* sequential case */ 421*4f7415efSAmlan Barua printf("Routine is getting called\n"); 422*4f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 423*4f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 424*4f7415efSAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 425*4f7415efSAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 426*4f7415efSAmlan Barua #else 427*4f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 428*4f7415efSAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 429*4f7415efSAmlan Barua if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);} 430*4f7415efSAmlan Barua #endif 431*4f7415efSAmlan Barua } else { 432*4f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 433*4f7415efSAmlan Barua ptrdiff_t local_n1,local_1_end; 434*4f7415efSAmlan Barua fftw_complex *data_fin,*data_fout,data_bout; 435*4f7415efSAmlan Barua double *data_finr,*data_boutr ; 436*4f7415efSAmlan Barua ptrdiff_t local_1_start,temp; 437*4f7415efSAmlan Barua switch (ndim){ 438*4f7415efSAmlan Barua case 1: 439*4f7415efSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet"); 440*4f7415efSAmlan Barua case 2: 441*4f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 442*4f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 443*4f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 444*4f7415efSAmlan Barua if (fin) { 445*4f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 446*4f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 447*4f7415efSAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 448*4f7415efSAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 449*4f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 450*4f7415efSAmlan Barua } 451*4f7415efSAmlan Barua if (fout) { 452*4f7415efSAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 453*4f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 454*4f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 455*4f7415efSAmlan Barua } 456*4f7415efSAmlan Barua if (bout) { 457*4f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 458*4f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 459*4f7415efSAmlan Barua ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 460*4f7415efSAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 461*4f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 462*4f7415efSAmlan Barua } 463*4f7415efSAmlan Barua 464*4f7415efSAmlan Barua //printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 465*4f7415efSAmlan Barua 466*4f7415efSAmlan Barua #else 467*4f7415efSAmlan Barua /* Get local size */ 468*4f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 469*4f7415efSAmlan Barua if (fin) { 470*4f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 471*4f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 472*4f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 473*4f7415efSAmlan Barua } 474*4f7415efSAmlan Barua if (fout) { 475*4f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 476*4f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 477*4f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 478*4f7415efSAmlan Barua } 479*4f7415efSAmlan Barua if (bout) { 480*4f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 481*4f7415efSAmlan Barua ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 482*4f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 483*4f7415efSAmlan Barua } 484*4f7415efSAmlan Barua 485*4f7415efSAmlan Barua //printf("Hope this does not come here"); 486*4f7415efSAmlan Barua #endif 487*4f7415efSAmlan Barua break; 488*4f7415efSAmlan Barua 489*4f7415efSAmlan Barua case 3: 490*4f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 491*4f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 492*4f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 493*4f7415efSAmlan Barua if (fin) { 494*4f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 495*4f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 496*4f7415efSAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 497*4f7415efSAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 498*4f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 499*4f7415efSAmlan Barua } 500*4f7415efSAmlan Barua if (fout) { 501*4f7415efSAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 502*4f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 503*4f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 504*4f7415efSAmlan Barua } 505*4f7415efSAmlan Barua if (bout) { 506*4f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 507*4f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 508*4f7415efSAmlan Barua // ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 509*4f7415efSAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 510*4f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 511*4f7415efSAmlan Barua } 512*4f7415efSAmlan Barua 513*4f7415efSAmlan Barua //printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 514*4f7415efSAmlan Barua 515*4f7415efSAmlan Barua #else 516*4f7415efSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet"); 517*4f7415efSAmlan Barua #endif 518*4f7415efSAmlan Barua break; 519*4f7415efSAmlan Barua default: 520*4f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 521*4f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 522*4f7415efSAmlan Barua printf("The value of temp is %ld\n",temp); 523*4f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 524*4f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 525*4f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 526*4f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 527*4f7415efSAmlan Barua 528*4f7415efSAmlan Barua if (fin) { 529*4f7415efSAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 530*4f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 531*4f7415efSAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 532*4f7415efSAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 533*4f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 534*4f7415efSAmlan Barua } 535*4f7415efSAmlan Barua if (fout) { 536*4f7415efSAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 537*4f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 538*4f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 539*4f7415efSAmlan Barua } 540*4f7415efSAmlan Barua if (bout) { 541*4f7415efSAmlan Barua data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 542*4f7415efSAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 543*4f7415efSAmlan Barua //ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 544*4f7415efSAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 545*4f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 546*4f7415efSAmlan Barua } 547*4f7415efSAmlan Barua 548*4f7415efSAmlan Barua #else 549*4f7415efSAmlan Barua 550*4f7415efSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet"); 551*4f7415efSAmlan Barua #endif 552*4f7415efSAmlan Barua break; 553*4f7415efSAmlan Barua } 554*4f7415efSAmlan Barua } 555*4f7415efSAmlan Barua PetscFunctionReturn(0); 556*4f7415efSAmlan Barua } 557*4f7415efSAmlan Barua EXTERN_C_END 558*4f7415efSAmlan Barua 559*4f7415efSAmlan Barua 560*4f7415efSAmlan Barua 561c92418ccSAmlan Barua #undef __FUNCT__ 562b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW" 563b2b77a04SHong Zhang /* 564b2b77a04SHong Zhang MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 565b2b77a04SHong Zhang parallel layout determined by FFTW 566b2b77a04SHong Zhang 567b2b77a04SHong Zhang Collective on Mat 568b2b77a04SHong Zhang 569b2b77a04SHong Zhang Input Parameter: 570b2b77a04SHong Zhang . mat - the matrix 571b2b77a04SHong Zhang 572b2b77a04SHong Zhang Output Parameter: 573b2b77a04SHong Zhang + fin - (optional) input vector of forward FFTW 574b2b77a04SHong Zhang - fout - (optional) output vector of forward FFTW 575b2b77a04SHong Zhang 576b2b77a04SHong Zhang Level: advanced 577b2b77a04SHong Zhang 578b2b77a04SHong Zhang .seealso: MatCreateFFTW() 579b2b77a04SHong Zhang */ 580b2b77a04SHong Zhang PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 581b2b77a04SHong Zhang { 582b2b77a04SHong Zhang PetscErrorCode ierr; 583b2b77a04SHong Zhang PetscMPIInt size,rank; 584b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 585b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 586c92418ccSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 5873c3a9c75SAmlan Barua PetscInt N=fft->N, N1, n1,vsize; 588e81bb599SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 589b2b77a04SHong Zhang 590b2b77a04SHong Zhang PetscFunctionBegin; 591b2b77a04SHong Zhang PetscValidHeaderSpecific(A,MAT_CLASSID,1); 592b2b77a04SHong Zhang PetscValidType(A,1); 593b2b77a04SHong Zhang 594b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 595b2b77a04SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 596b2b77a04SHong Zhang if (size == 1){ /* sequential case */ 597e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX) 598b2b77a04SHong Zhang if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 599b2b77a04SHong Zhang if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 600e5d7f247SAmlan Barua #else 601e81bb599SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 602e81bb599SAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 603e81bb599SAmlan Barua #endif 604b2b77a04SHong Zhang } else { /* mpi case */ 605b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 6066882372aSHong Zhang ptrdiff_t local_n1,local_1_end; 607b2b77a04SHong Zhang fftw_complex *data_fin,*data_fout; 60851d1eed7SAmlan Barua double *data_finr ; 609b3a17365SAmlan Barua ptrdiff_t local_1_start,temp; 610c92418ccSAmlan Barua // PetscInt ctr; 611c92418ccSAmlan Barua // ptrdiff_t ndim1,*pdim; 612c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 613c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 61411902ff2SHong Zhang 615c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 616f76f76beSAmlan Barua // {k 617c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 618c92418ccSAmlan Barua // } 619b2b77a04SHong Zhang 620f76f76beSAmlan Barua 621f76f76beSAmlan Barua 622b2b77a04SHong Zhang switch (ndim){ 623b2b77a04SHong Zhang case 1: 6246882372aSHong Zhang /* Get local size */ 625e5d7f247SAmlan Barua /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */ 626c92418ccSAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 6276882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 6286882372aSHong Zhang if (fin) { 6296882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6306882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 6316882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6326882372aSHong Zhang } 6336882372aSHong Zhang if (fout) { 6346882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6356882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6366882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6376882372aSHong Zhang } 638b2b77a04SHong Zhang break; 639b2b77a04SHong Zhang case 2: 6403c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6413c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 6423c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 6433c3a9c75SAmlan Barua if (fin) { 6443c3a9c75SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 64554dd5118SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 6463c3a9c75SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 64709dd8a53SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 6483c3a9c75SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6493c3a9c75SAmlan Barua } 6503c3a9c75SAmlan Barua if (fout) { 65109dd8a53SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 65209dd8a53SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6533c3a9c75SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6543c3a9c75SAmlan Barua } 655f76f76beSAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 6563c3a9c75SAmlan Barua 6573c3a9c75SAmlan Barua #else 658b2b77a04SHong Zhang /* Get local size */ 65964657d84SAmlan Barua //printf("Hope this does not come here"); 660b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 661b2b77a04SHong Zhang if (fin) { 662b2b77a04SHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 663b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 664b2b77a04SHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 665b2b77a04SHong Zhang } 666b2b77a04SHong Zhang if (fout) { 667b2b77a04SHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 668b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 669b2b77a04SHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 670b2b77a04SHong Zhang } 67164657d84SAmlan Barua //printf("Hope this does not come here"); 6723c3a9c75SAmlan Barua #endif 673b2b77a04SHong Zhang break; 674b2b77a04SHong Zhang case 3: 6756882372aSHong Zhang /* Get local size */ 6763c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 67751d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 67851d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 67951d1eed7SAmlan Barua if (fin) { 68051d1eed7SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 68151d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 68251d1eed7SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 68351d1eed7SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 68451d1eed7SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 68551d1eed7SAmlan Barua } 68651d1eed7SAmlan Barua if (fout) { 68751d1eed7SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 68851d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 68951d1eed7SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 69051d1eed7SAmlan Barua } 69151d1eed7SAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 69251d1eed7SAmlan Barua 69351d1eed7SAmlan Barua 6943c3a9c75SAmlan Barua #else 6956882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 69611902ff2SHong Zhang // printf("The quantity n is %d",n); 6976882372aSHong Zhang if (fin) { 6986882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6996882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 7006882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 7016882372aSHong Zhang } 7026882372aSHong Zhang if (fout) { 7036882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7046882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 7056882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 7066882372aSHong Zhang } 7073c3a9c75SAmlan Barua #endif 708b2b77a04SHong Zhang break; 709b2b77a04SHong Zhang default: 7106882372aSHong Zhang /* Get local size */ 7113c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 712b3a17365SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 713b3a17365SAmlan Barua printf("The value of temp is %ld\n",temp); 714b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 715b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 716b3a17365SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 717b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 718b3a17365SAmlan Barua if (fin) { 719b3a17365SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 720b3a17365SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 721b3a17365SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 722b3a17365SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 723b3a17365SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 724b3a17365SAmlan Barua } 725b3a17365SAmlan Barua if (fout) { 726b3a17365SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 727b3a17365SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 728b3a17365SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 729b3a17365SAmlan Barua } 730b3a17365SAmlan Barua 7313c3a9c75SAmlan Barua #else 732c92418ccSAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 73311902ff2SHong Zhang // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 73411902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 73511902ff2SHong Zhang // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 73611902ff2SHong Zhang // for(i=0;i<ndim;i++) 73711902ff2SHong Zhang // { 73811902ff2SHong Zhang // pdim[i]=dim[i];printf("%d",pdim[i]); 73911902ff2SHong Zhang // } 74011902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 74111902ff2SHong Zhang // printf("The quantity n is %d",n); 7426882372aSHong Zhang if (fin) { 7436882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7446882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 7456882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 7466882372aSHong Zhang } 7476882372aSHong Zhang if (fout) { 7486882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 7496882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 7506882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 7516882372aSHong Zhang } 7523c3a9c75SAmlan Barua #endif 753b2b77a04SHong Zhang break; 754b2b77a04SHong Zhang } 755b2b77a04SHong Zhang } 75654dd5118SAmlan Barua // if (fin){ 75754dd5118SAmlan Barua // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 75854dd5118SAmlan Barua // } 75954dd5118SAmlan Barua // if (fout){ 76054dd5118SAmlan Barua // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 76154dd5118SAmlan Barua // } 762b2b77a04SHong Zhang PetscFunctionReturn(0); 763b2b77a04SHong Zhang } 764b2b77a04SHong Zhang 765b2b77a04SHong Zhang #undef __FUNCT__ 7663c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT" 7673c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 7683c3a9c75SAmlan Barua { 7693c3a9c75SAmlan Barua PetscErrorCode ierr; 7703c3a9c75SAmlan Barua PetscFunctionBegin; 7713c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 7723c3a9c75SAmlan Barua PetscFunctionReturn(0); 7733c3a9c75SAmlan Barua } 77454efbe56SHong Zhang 775b2b77a04SHong Zhang /* 7763c3a9c75SAmlan Barua InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block 7773c3a9c75SAmlan Barua Input A, x, y 7783c3a9c75SAmlan Barua A - FFTW matrix 77954dd5118SAmlan Barua x - user data 780b2b77a04SHong Zhang Options Database Keys: 781b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 782b2b77a04SHong Zhang 783b2b77a04SHong Zhang Level: intermediate 784b2b77a04SHong Zhang 785b2b77a04SHong Zhang */ 7863c3a9c75SAmlan Barua 7873c3a9c75SAmlan Barua EXTERN_C_BEGIN 7883c3a9c75SAmlan Barua #undef __FUNCT__ 7893c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW" 7903c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 7913c3a9c75SAmlan Barua { 7923c3a9c75SAmlan Barua PetscErrorCode ierr; 7933c3a9c75SAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 7943c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 7953c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 7963c3a9c75SAmlan Barua PetscInt N=fft->N, N1, n1 ,NM; 797b223cc97SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 798b223cc97SAmlan Barua PetscInt low, *indx1, *indx2, tempindx, tempindx1; 799e5d7f247SAmlan Barua PetscInt i,j,k,rank,size,partial_dim; 8003c3a9c75SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 801e5d7f247SAmlan Barua ptrdiff_t local_n1,local_1_start,temp; 8023c3a9c75SAmlan Barua VecScatter vecscat; 8033c3a9c75SAmlan Barua IS list1,list2; 8043c3a9c75SAmlan Barua 805f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 806f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 8073c3a9c75SAmlan Barua ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 808f76f76beSAmlan Barua printf("Local ownership starts at %d\n",low); 8093c3a9c75SAmlan Barua 810e81bb599SAmlan Barua if (size==1) 811e81bb599SAmlan Barua { 8127536937bSAmlan Barua /* switch (ndim){ 813e81bb599SAmlan Barua case 1: 814e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 815e81bb599SAmlan Barua for (i=0;i<dim[0];i++) 816e81bb599SAmlan Barua { 817e81bb599SAmlan Barua indx1[i] = i; 818e81bb599SAmlan Barua } 819e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 820e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 821e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 822e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 823e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 824b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 825b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 826e81bb599SAmlan Barua break; 827e81bb599SAmlan Barua 828e81bb599SAmlan Barua case 2: 829e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 830e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 831e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 832e81bb599SAmlan Barua indx1[i*dim[1]+j] = i*dim[1] + j; 833e81bb599SAmlan Barua } 834e81bb599SAmlan Barua } 835e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 836e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 837e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 838e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 839e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 840b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 841b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 842e81bb599SAmlan Barua break; 843e81bb599SAmlan Barua case 3: 844e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 845e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 846e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 847e81bb599SAmlan Barua for (k=0;k<dim[2];k++){ 848e81bb599SAmlan Barua indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 849e81bb599SAmlan Barua } 850e81bb599SAmlan Barua } 851e81bb599SAmlan Barua } 852e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 853e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 854e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 855e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 856e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 857b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 858b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 859e81bb599SAmlan Barua break; 860e81bb599SAmlan Barua default: 8617536937bSAmlan Barua */ 8626971673cSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1); 8636971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 8646971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8656971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8666971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 867b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 8686971673cSAmlan Barua //ierr = ISDestroy(list1);CHKERRQ(ierr); 8697536937bSAmlan Barua // break; 8707536937bSAmlan Barua // } 871e81bb599SAmlan Barua } 872e81bb599SAmlan Barua 873e81bb599SAmlan Barua else{ 874e81bb599SAmlan Barua 8753c3a9c75SAmlan Barua switch (ndim){ 8763c3a9c75SAmlan Barua case 1: 87764657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 87864657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 87964657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 88064657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 88164657d84SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 88264657d84SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 88364657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 88464657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 88564657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 88664657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 88764657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 88864657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 88964657d84SAmlan Barua break; 89064657d84SAmlan Barua #else 891e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 89264657d84SAmlan Barua #endif 8933c3a9c75SAmlan Barua break; 8943c3a9c75SAmlan Barua case 2: 895bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 896bd59e6c6SAmlan Barua //PetscInt my_value; 897bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 898bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 899bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 900bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 901bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 902bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 903bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 904bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 906bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 907bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 908bd59e6c6SAmlan Barua break; 909bd59e6c6SAmlan Barua #else 9103c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 9113c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 9123c3a9c75SAmlan Barua 9135b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 9145b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 9155b3e41ffSAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 9163c3a9c75SAmlan Barua 9173c3a9c75SAmlan Barua if (dim[1]%2==0) 9183c3a9c75SAmlan Barua NM = dim[1]+2; 9193c3a9c75SAmlan Barua else 9203c3a9c75SAmlan Barua NM = dim[1]+1; 9213c3a9c75SAmlan Barua 9223c3a9c75SAmlan Barua for (i=0;i<local_n0;i++){ 9233c3a9c75SAmlan Barua for (j=0;j<dim[1];j++){ 9243c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 9253c3a9c75SAmlan Barua tempindx1 = i*NM + j; 9265b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 9273c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 9285b3e41ffSAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 9295b3e41ffSAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 9305b3e41ffSAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 9315b3e41ffSAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 9323c3a9c75SAmlan Barua } 9333c3a9c75SAmlan Barua } 9343c3a9c75SAmlan Barua 9353c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 9363c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 9373c3a9c75SAmlan Barua 938f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 939f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 941f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 942b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 943b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 944b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 945b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 9463c3a9c75SAmlan Barua break; 947bd59e6c6SAmlan Barua #endif 9483c3a9c75SAmlan Barua 9493c3a9c75SAmlan Barua case 3: 950bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 951bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 952bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 953bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 954bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 955bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 956bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 957bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 958bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 960bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 961bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 962bd59e6c6SAmlan Barua break; 963bd59e6c6SAmlan Barua #else 96451d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 96551d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 96651d1eed7SAmlan Barua 96751d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 96851d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 96951d1eed7SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 97051d1eed7SAmlan Barua 97151d1eed7SAmlan Barua if (dim[2]%2==0) 97251d1eed7SAmlan Barua NM = dim[2]+2; 97351d1eed7SAmlan Barua else 97451d1eed7SAmlan Barua NM = dim[2]+1; 97551d1eed7SAmlan Barua 97651d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 97751d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 97851d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 97951d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 98051d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 98151d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 98251d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 98351d1eed7SAmlan Barua } 98451d1eed7SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 98551d1eed7SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 98651d1eed7SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 98751d1eed7SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 98851d1eed7SAmlan Barua } 98951d1eed7SAmlan Barua } 99051d1eed7SAmlan Barua 99151d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 99251d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 99351d1eed7SAmlan Barua 99451d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 99551d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 99651d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 99751d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 998b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 999b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1000b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1001b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 10023c3a9c75SAmlan Barua break; 1003bd59e6c6SAmlan Barua #endif 10043c3a9c75SAmlan Barua 10053c3a9c75SAmlan Barua default: 1006bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1007bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1008bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1009bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1010bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1011bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1012bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1013bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1014bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1015bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1016bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1017bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1018bd59e6c6SAmlan Barua 1019bd59e6c6SAmlan Barua 1020bd59e6c6SAmlan Barua #else 1021e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1022e5d7f247SAmlan Barua printf("The value of temp is %ld\n",temp); 1023e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1024e5d7f247SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1025e5d7f247SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1026e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1027e5d7f247SAmlan Barua 1028e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 1029e5d7f247SAmlan Barua printf("The value of partial dim is %d\n",partial_dim); 1030e5d7f247SAmlan Barua 1031e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1032e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1033e5d7f247SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 1034e5d7f247SAmlan Barua 1035e5d7f247SAmlan Barua if (dim[ndim-1]%2==0) 1036ba6e06d0SAmlan Barua NM = 2; 1037e5d7f247SAmlan Barua else 1038ba6e06d0SAmlan Barua NM = 1; 1039e5d7f247SAmlan Barua 10406971673cSAmlan Barua j = low; 10416971673cSAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 10426971673cSAmlan Barua { 10436971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 10446971673cSAmlan Barua indx2[i] = j; 1045ba6e06d0SAmlan Barua //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank); 10466971673cSAmlan Barua if (k%dim[ndim-1]==0) 10476971673cSAmlan Barua { j+=NM;} 10486971673cSAmlan Barua j++; 10496971673cSAmlan Barua } 10506971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 10516971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 10526971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 10536971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10546971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10556971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1056b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1057b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1058b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1059b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 10603c3a9c75SAmlan Barua break; 1061bd59e6c6SAmlan Barua #endif 10623c3a9c75SAmlan Barua } 1063e81bb599SAmlan Barua } 10643c3a9c75SAmlan Barua 10653c3a9c75SAmlan Barua return 0; 10663c3a9c75SAmlan Barua } 10673c3a9c75SAmlan Barua EXTERN_C_END 10683c3a9c75SAmlan Barua 10693c3a9c75SAmlan Barua #undef __FUNCT__ 10703c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT" 10713c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 10723c3a9c75SAmlan Barua { 10733c3a9c75SAmlan Barua PetscErrorCode ierr; 10743c3a9c75SAmlan Barua PetscFunctionBegin; 10753c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 10763c3a9c75SAmlan Barua PetscFunctionReturn(0); 10773c3a9c75SAmlan Barua } 107854efbe56SHong Zhang 10795b3e41ffSAmlan Barua /* 10805b3e41ffSAmlan Barua OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 10815b3e41ffSAmlan Barua Input A, x, y 10825b3e41ffSAmlan Barua A - FFTW matrix 10835b3e41ffSAmlan Barua x - FFTW vector 10845b3e41ffSAmlan Barua y - PETSc vector that user can read 10855b3e41ffSAmlan Barua Options Database Keys: 10865b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 10875b3e41ffSAmlan Barua 10885b3e41ffSAmlan Barua Level: intermediate 10895b3e41ffSAmlan Barua 10903c3a9c75SAmlan Barua */ 10913c3a9c75SAmlan Barua 10923c3a9c75SAmlan Barua EXTERN_C_BEGIN 10933c3a9c75SAmlan Barua #undef __FUNCT__ 10945b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW" 10955b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 10965b3e41ffSAmlan Barua { 10975b3e41ffSAmlan Barua PetscErrorCode ierr; 10985b3e41ffSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 10995b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 11005b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 11015b3e41ffSAmlan Barua PetscInt N=fft->N, N1, n1 ,NM; 1102b223cc97SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 1103b223cc97SAmlan Barua PetscInt low, *indx1, *indx2, tempindx, tempindx1; 1104ba6e06d0SAmlan Barua PetscInt i,j,k,rank,size,partial_dim; 11055b3e41ffSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 1106ba6e06d0SAmlan Barua ptrdiff_t local_n1,local_1_start,temp; 11075b3e41ffSAmlan Barua VecScatter vecscat; 11085b3e41ffSAmlan Barua IS list1,list2; 11095b3e41ffSAmlan Barua 11105b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 11115b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1112cfe1ae98SAmlan Barua ierr = VecGetOwnershipRange(x,&low,PETSC_NULL); 11135b3e41ffSAmlan Barua 1114e81bb599SAmlan Barua if (size==1){ 11157536937bSAmlan Barua /* 11165b3e41ffSAmlan Barua switch (ndim){ 11175b3e41ffSAmlan Barua case 1: 1118e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 1119e81bb599SAmlan Barua for (i=0;i<dim[0];i++) 1120e81bb599SAmlan Barua { 1121e81bb599SAmlan Barua indx1[i] = i; 1122e81bb599SAmlan Barua } 1123e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1124e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1125e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1126e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1127e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1128b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1129b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1130e81bb599SAmlan Barua break; 1131e81bb599SAmlan Barua 1132e81bb599SAmlan Barua case 2: 1133e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 1134e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 1135e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 1136e81bb599SAmlan Barua indx1[i*dim[1]+j] = i*dim[1] + j; 1137e81bb599SAmlan Barua } 1138e81bb599SAmlan Barua } 1139e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1140e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1141e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1143e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1144b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1145b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1146e81bb599SAmlan Barua break; 1147e81bb599SAmlan Barua case 3: 1148e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1149e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 1150e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 1151e81bb599SAmlan Barua for (k=0;k<dim[2];k++){ 1152e81bb599SAmlan Barua indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 1153e81bb599SAmlan Barua } 1154e81bb599SAmlan Barua } 1155e81bb599SAmlan Barua } 1156e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1157e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1158e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1159e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1160e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1161b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1162b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1163e81bb599SAmlan Barua break; 1164e81bb599SAmlan Barua default: 11657536937bSAmlan Barua */ 11666971673cSAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1); 11676971673cSAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF); 11686971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 11696971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 11706971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 11716971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1172b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 11737536937bSAmlan Barua // break; 11747536937bSAmlan Barua // } 1175e81bb599SAmlan Barua } 1176e81bb599SAmlan Barua else{ 1177e81bb599SAmlan Barua 1178e81bb599SAmlan Barua switch (ndim){ 1179e81bb599SAmlan Barua case 1: 118064657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 118164657d84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 118264657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 118364657d84SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 118464657d84SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 118564657d84SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 118664657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 118764657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 118864657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 118964657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 119064657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 119164657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 119264657d84SAmlan Barua break; 119364657d84SAmlan Barua 119464657d84SAmlan Barua #else 11956a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 119664657d84SAmlan Barua #endif 11975b3e41ffSAmlan Barua break; 11985b3e41ffSAmlan Barua case 2: 1199bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1200bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1201bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 1202bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 1203bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1204bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1205bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1207bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1208bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1209bd59e6c6SAmlan Barua break; 1210bd59e6c6SAmlan Barua #else 12115b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 12125b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 12135b3e41ffSAmlan Barua 12145b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 12155b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 12165b3e41ffSAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 12175b3e41ffSAmlan Barua 12185b3e41ffSAmlan Barua if (dim[1]%2==0) 12195b3e41ffSAmlan Barua NM = dim[1]+2; 12205b3e41ffSAmlan Barua else 12215b3e41ffSAmlan Barua NM = dim[1]+1; 12225b3e41ffSAmlan Barua 12235b3e41ffSAmlan Barua 12245b3e41ffSAmlan Barua 12255b3e41ffSAmlan Barua for (i=0;i<local_n0;i++){ 12265b3e41ffSAmlan Barua for (j=0;j<dim[1];j++){ 12275b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 12285b3e41ffSAmlan Barua tempindx1 = i*NM + j; 12295b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 12305b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 1231cfe1ae98SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 1232cfe1ae98SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 1233cfe1ae98SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 1234cfe1ae98SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 12355b3e41ffSAmlan Barua } 12365b3e41ffSAmlan Barua } 12375b3e41ffSAmlan Barua 12385b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 12395b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 12405b3e41ffSAmlan Barua 12415b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 12425b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12435b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 12445b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1245b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1246b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1247b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1248b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 12495b3e41ffSAmlan Barua break; 1250bd59e6c6SAmlan Barua #endif 12515b3e41ffSAmlan Barua case 3: 1252bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1253bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1254bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1255bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1256bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1257bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1258bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1259bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1260bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1261bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1262bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1263bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1264bd59e6c6SAmlan Barua break; 1265bd59e6c6SAmlan Barua #else 1266bd59e6c6SAmlan Barua 126751d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 126851d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 126951d1eed7SAmlan Barua 127051d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 127151d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 127251d1eed7SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 127351d1eed7SAmlan Barua 127451d1eed7SAmlan Barua if (dim[2]%2==0) 127551d1eed7SAmlan Barua NM = dim[2]+2; 127651d1eed7SAmlan Barua else 127751d1eed7SAmlan Barua NM = dim[2]+1; 127851d1eed7SAmlan Barua 127951d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 128051d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 128151d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 128251d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 128351d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 128451d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 128551d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 128651d1eed7SAmlan Barua } 128751d1eed7SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 128851d1eed7SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 128951d1eed7SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 129051d1eed7SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 129151d1eed7SAmlan Barua } 129251d1eed7SAmlan Barua } 129351d1eed7SAmlan Barua 129451d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 129551d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 129651d1eed7SAmlan Barua 129751d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 129851d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 129951d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 130051d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1301b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1302b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1303b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1304b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 13055b3e41ffSAmlan Barua break; 1306bd59e6c6SAmlan Barua #endif 13075b3e41ffSAmlan Barua default: 1308bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1309bd59e6c6SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1310bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1311bd59e6c6SAmlan Barua ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1312bd59e6c6SAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1313bd59e6c6SAmlan Barua //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1314bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1315bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1316bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1317bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1318bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1319bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1320bd59e6c6SAmlan Barua #else 1321ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1322ba6e06d0SAmlan Barua printf("The value of temp is %ld\n",temp); 1323ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1324ba6e06d0SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1325ba6e06d0SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1326ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1327ba6e06d0SAmlan Barua 1328ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1329ba6e06d0SAmlan Barua printf("The value of partial dim is %d\n",partial_dim); 1330ba6e06d0SAmlan Barua 1331ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1332ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1333ba6e06d0SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 1334ba6e06d0SAmlan Barua 1335ba6e06d0SAmlan Barua if (dim[ndim-1]%2==0) 1336ba6e06d0SAmlan Barua NM = 2; 1337ba6e06d0SAmlan Barua else 1338ba6e06d0SAmlan Barua NM = 1; 1339ba6e06d0SAmlan Barua 1340ba6e06d0SAmlan Barua j = low; 1341ba6e06d0SAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1342ba6e06d0SAmlan Barua { 1343ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1344ba6e06d0SAmlan Barua indx2[i] = j; 1345ba6e06d0SAmlan Barua //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank); 1346ba6e06d0SAmlan Barua if (k%dim[ndim-1]==0) 1347ba6e06d0SAmlan Barua { j+=NM;} 1348ba6e06d0SAmlan Barua j++; 1349ba6e06d0SAmlan Barua } 1350ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1351ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1352ba6e06d0SAmlan Barua 1353ba6e06d0SAmlan Barua //ISView(list1,PETSC_VIEWER_STDOUT_SELF); 1354ba6e06d0SAmlan Barua 1355ba6e06d0SAmlan Barua 1356ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1357ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1358ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1359ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1360b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1361b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1362b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1363b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1364ba6e06d0SAmlan Barua 13655b3e41ffSAmlan Barua break; 1366bd59e6c6SAmlan Barua #endif 13675b3e41ffSAmlan Barua } 1368e81bb599SAmlan Barua } 13695b3e41ffSAmlan Barua return 0; 13705b3e41ffSAmlan Barua } 13715b3e41ffSAmlan Barua EXTERN_C_END 13725b3e41ffSAmlan Barua 13735b3e41ffSAmlan Barua EXTERN_C_BEGIN 13745b3e41ffSAmlan Barua #undef __FUNCT__ 13753c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 13763c3a9c75SAmlan Barua /* 13773c3a9c75SAmlan Barua MatCreate_FFTW - Creates a matrix object that provides FFT 13783c3a9c75SAmlan Barua via the external package FFTW 13793c3a9c75SAmlan Barua Options Database Keys: 13803c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 13813c3a9c75SAmlan Barua 13823c3a9c75SAmlan Barua Level: intermediate 13833c3a9c75SAmlan Barua 13843c3a9c75SAmlan Barua */ 13853c3a9c75SAmlan Barua 1386b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 1387b2b77a04SHong Zhang { 1388b2b77a04SHong Zhang PetscErrorCode ierr; 1389b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 1390b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1391b2b77a04SHong Zhang Mat_FFTW *fftw; 1392b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1393b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1394b2b77a04SHong Zhang PetscBool flg; 1395b3a17365SAmlan Barua PetscInt p_flag,partial_dim=1,ctr,N1; 139611902ff2SHong Zhang PetscMPIInt size,rank; 1397b3a17365SAmlan Barua ptrdiff_t *pdim, temp; 13984a16a297SSean Farley ptrdiff_t local_n1,local_1_start,local_1_end; 1399b2b77a04SHong Zhang 1400b2b77a04SHong Zhang PetscFunctionBegin; 1401b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 140211902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 140354dd5118SAmlan Barua ierr = MPI_Barrier(PETSC_COMM_WORLD); 1404c92418ccSAmlan Barua 140511902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 140611902ff2SHong Zhang pdim[0] = dim[0]; 140711902ff2SHong Zhang for(ctr=1;ctr<ndim;ctr++) 140811902ff2SHong Zhang { 14096882372aSHong Zhang partial_dim *= dim[ctr]; 141011902ff2SHong Zhang pdim[ctr] = dim[ctr]; 14116882372aSHong Zhang } 14123c3a9c75SAmlan Barua 1413b2b77a04SHong Zhang if (size == 1) { 1414e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1415b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1416b2b77a04SHong Zhang n = N; 1417e81bb599SAmlan Barua #else 1418e81bb599SAmlan Barua int tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1]; 1419e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1420e81bb599SAmlan Barua n = tot_dim; 1421e81bb599SAmlan Barua #endif 1422e81bb599SAmlan Barua 1423b2b77a04SHong Zhang } else { 1424b223cc97SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start;//local_n1,local_1_end; 1425b2b77a04SHong Zhang switch (ndim){ 1426b2b77a04SHong Zhang case 1: 14273c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 14283c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1429e5d7f247SAmlan Barua #else 14306882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 14316882372aSHong Zhang n = (PetscInt)local_n0; 14326882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1433e5d7f247SAmlan Barua #endif 14343c3a9c75SAmlan Barua // PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW); 1435b2b77a04SHong Zhang break; 1436b2b77a04SHong Zhang case 2: 14375b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 1438b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1439b2b77a04SHong Zhang /* 1440b2b77a04SHong Zhang PetscMPIInt rank; 1441b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1442b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 1443b2b77a04SHong Zhang */ 1444b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1445b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 14465b3e41ffSAmlan Barua #else 14475b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 14485b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 14495b3e41ffSAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 14505b3e41ffSAmlan Barua #endif 1451b2b77a04SHong Zhang break; 1452b2b77a04SHong Zhang case 3: 145311902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 145451d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 145551d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 14566882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 14576882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 145851d1eed7SAmlan Barua #else 145951d1eed7SAmlan Barua printf("The code comes here\n"); 146051d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 146151d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 146251d1eed7SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 146351d1eed7SAmlan Barua #endif 1464b2b77a04SHong Zhang break; 1465b2b77a04SHong Zhang default: 1466b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 146711902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1468c92418ccSAmlan Barua // printf("The value of alloc local is %ld from process %d\n",alloc_local,rank); 146911902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 14706882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 147111902ff2SHong Zhang // printf("New partial dimension is %d %d %d",n,N,ndim); 14726882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1473b3a17365SAmlan Barua #else 1474b3a17365SAmlan Barua temp = pdim[ndim-1]; 1475b3a17365SAmlan Barua pdim[ndim-1]= temp/2 + 1; 1476b3a17365SAmlan Barua printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]); 1477b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1478b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1479b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1480b3a17365SAmlan Barua pdim[ndim-1] = temp; 1481b3a17365SAmlan Barua printf("For Multi dim case n = %d, N1 = %d\n",n,N1); 1482b3a17365SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1483b3a17365SAmlan Barua #endif 1484b2b77a04SHong Zhang break; 1485b2b77a04SHong Zhang } 1486b2b77a04SHong Zhang } 1487b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1488b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1489b2b77a04SHong Zhang fft->data = (void*)fftw; 1490b2b77a04SHong Zhang 1491b2b77a04SHong Zhang fft->n = n; 1492c92418ccSAmlan Barua fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1493e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 1494c92418ccSAmlan Barua ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1495b1301b2eSHong Zhang for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1496c92418ccSAmlan Barua 1497b2b77a04SHong Zhang fftw->p_forward = 0; 1498b2b77a04SHong Zhang fftw->p_backward = 0; 1499b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1500b2b77a04SHong Zhang 1501b2b77a04SHong Zhang if (size == 1){ 1502b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1503b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1504b2b77a04SHong Zhang } else { 1505b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1506b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1507b2b77a04SHong Zhang } 1508b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 15096a506ed8SAmlan Barua // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D 1510b2b77a04SHong Zhang A->ops->getvecs = MatGetVecs_FFTW; 1511b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 1512*4f7415efSAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFT_C","MatGetVecsFFT_FFTW",MatGetVecsFFT_FFTW); 15133c3a9c75SAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 15145b3e41ffSAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 1515b2b77a04SHong Zhang 1516b2b77a04SHong Zhang /* get runtime options */ 1517b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1518b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1519b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 1520b2b77a04SHong Zhang PetscOptionsEnd(); 1521b2b77a04SHong Zhang PetscFunctionReturn(0); 1522b2b77a04SHong Zhang } 1523b2b77a04SHong Zhang EXTERN_C_END 15243c3a9c75SAmlan Barua 15253c3a9c75SAmlan Barua 15263c3a9c75SAmlan Barua 15273c3a9c75SAmlan Barua 1528