1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14e5d7f247SAmlan Barua PetscInt partial_dim; 15b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 16b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 19b2b77a04SHong Zhang } Mat_FFTW; 20b2b77a04SHong Zhang 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 28b2b77a04SHong Zhang 29b2b77a04SHong Zhang #undef __FUNCT__ 30b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 31b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 32b2b77a04SHong Zhang { 33b2b77a04SHong Zhang PetscErrorCode ierr; 34b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 35b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 36b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 37b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 38b2b77a04SHong Zhang 39b2b77a04SHong Zhang PetscFunctionBegin; 4058a912c5SAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 41b9ae062cSHong Zhang 4258a912c5SAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 4358a912c5SAmlan Barua //#endif 44b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 45b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 46b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 47b2b77a04SHong Zhang switch (ndim){ 48b2b77a04SHong Zhang case 1: 4958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 50b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 5158a912c5SAmlan Barua #else 5258a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 5358a912c5SAmlan Barua #endif 54b2b77a04SHong Zhang break; 55b2b77a04SHong Zhang case 2: 5658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 57b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 5858a912c5SAmlan Barua #else 5958a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 6058a912c5SAmlan Barua #endif 61b2b77a04SHong Zhang break; 62b2b77a04SHong Zhang case 3: 6358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 64b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6558a912c5SAmlan Barua #else 6658a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 6758a912c5SAmlan Barua #endif 68b2b77a04SHong Zhang break; 69b2b77a04SHong Zhang default: 7058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 71b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 7258a912c5SAmlan Barua #else 7358a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 7458a912c5SAmlan Barua #endif 75b2b77a04SHong Zhang break; 76b2b77a04SHong Zhang } 77b2b77a04SHong Zhang fftw->finarray = x_array; 78b2b77a04SHong Zhang fftw->foutarray = y_array; 79b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 80b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 81b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 82b2b77a04SHong Zhang } else { /* use existing plan */ 83b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 84b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 85b2b77a04SHong Zhang } else { 86b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 87b2b77a04SHong Zhang } 88b2b77a04SHong Zhang } 89b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 90b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 91b2b77a04SHong Zhang PetscFunctionReturn(0); 92b2b77a04SHong Zhang } 93b2b77a04SHong Zhang 94b2b77a04SHong Zhang #undef __FUNCT__ 95b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 96b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 97b2b77a04SHong Zhang { 98b2b77a04SHong Zhang PetscErrorCode ierr; 99b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 100b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 101b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 102b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 103b2b77a04SHong Zhang 104b2b77a04SHong Zhang PetscFunctionBegin; 105e81bb599SAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 106e81bb599SAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 107e81bb599SAmlan Barua //#endif 108b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 109b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 110b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 111b2b77a04SHong Zhang switch (ndim){ 112b2b77a04SHong Zhang case 1: 11358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 114b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 11558a912c5SAmlan Barua #else 116e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 11758a912c5SAmlan Barua #endif 118b2b77a04SHong Zhang break; 119b2b77a04SHong Zhang case 2: 12058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 121b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12258a912c5SAmlan Barua #else 123e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 12458a912c5SAmlan Barua #endif 125b2b77a04SHong Zhang break; 126b2b77a04SHong Zhang case 3: 12758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 128b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 12958a912c5SAmlan Barua #else 130e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 13158a912c5SAmlan Barua #endif 132b2b77a04SHong Zhang break; 133b2b77a04SHong Zhang default: 13458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 135b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 13658a912c5SAmlan Barua #else 137e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 13858a912c5SAmlan Barua #endif 139b2b77a04SHong Zhang break; 140b2b77a04SHong Zhang } 141b2b77a04SHong Zhang fftw->binarray = x_array; 142b2b77a04SHong Zhang fftw->boutarray = y_array; 143b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 144b2b77a04SHong Zhang } else { /* use existing plan */ 145b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 146b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 147b2b77a04SHong Zhang } else { 148b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 149b2b77a04SHong Zhang } 150b2b77a04SHong Zhang } 151b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 152b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 153b2b77a04SHong Zhang PetscFunctionReturn(0); 154b2b77a04SHong Zhang } 155b2b77a04SHong Zhang 156b2b77a04SHong Zhang #undef __FUNCT__ 157b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 158b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 159b2b77a04SHong Zhang { 160b2b77a04SHong Zhang PetscErrorCode ierr; 161b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 162b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 163b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 164c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 165b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 166c92418ccSAmlan Barua // PetscInt ctr; 167c92418ccSAmlan Barua // ptrdiff_t ndim1=(ptrdiff_t) ndim,*pdim; 168c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 169c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 170c92418ccSAmlan Barua 171c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 172c92418ccSAmlan Barua // { 173c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 174c92418ccSAmlan Barua // } 175b2b77a04SHong Zhang 176b2b77a04SHong Zhang PetscFunctionBegin; 1775b3e41ffSAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 1785b3e41ffSAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 1795b3e41ffSAmlan Barua //#endif 180c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 181c92418ccSAmlan Barua // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 18211902ff2SHong Zhang 183b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 184b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 185b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 186b2b77a04SHong Zhang switch (ndim){ 187b2b77a04SHong Zhang case 1: 1883c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 189b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1903c3a9c75SAmlan Barua #endif 191b2b77a04SHong Zhang break; 192b2b77a04SHong Zhang case 2: 1933c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 194b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1953c3a9c75SAmlan Barua #else 1965b3e41ffSAmlan Barua printf("The code comes here \n"); 1973c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1983c3a9c75SAmlan Barua #endif 199b2b77a04SHong Zhang break; 200b2b77a04SHong Zhang case 3: 2013c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 202b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2033c3a9c75SAmlan Barua #else 20451d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2053c3a9c75SAmlan Barua #endif 206b2b77a04SHong Zhang break; 207b2b77a04SHong Zhang default: 2083c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 209c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2103c3a9c75SAmlan Barua #else 2113c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2123c3a9c75SAmlan Barua #endif 21311902ff2SHong Zhang // fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 214b2b77a04SHong Zhang break; 215b2b77a04SHong Zhang } 216b2b77a04SHong Zhang fftw->finarray = x_array; 217b2b77a04SHong Zhang fftw->foutarray = y_array; 218b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 219b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 220b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 221b2b77a04SHong Zhang } else { /* use existing plan */ 222b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 223b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 224b2b77a04SHong Zhang } else { 225b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 226b2b77a04SHong Zhang } 227b2b77a04SHong Zhang } 228b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 229b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 230b2b77a04SHong Zhang PetscFunctionReturn(0); 231b2b77a04SHong Zhang } 232b2b77a04SHong Zhang 233b2b77a04SHong Zhang #undef __FUNCT__ 234b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 235b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 236b2b77a04SHong Zhang { 237b2b77a04SHong Zhang PetscErrorCode ierr; 238b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 239b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 240b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 241c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 242b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 243c92418ccSAmlan Barua // PetscInt ctr; 244c92418ccSAmlan Barua // ptrdiff_t ndim1=(ptrdiff_t)ndim,*pdim; 245c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 246c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 247c92418ccSAmlan Barua 248c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 249c92418ccSAmlan Barua // { 250c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 251c92418ccSAmlan Barua // } 252b2b77a04SHong Zhang 253b2b77a04SHong Zhang PetscFunctionBegin; 2543c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 2553c3a9c75SAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 2563c3a9c75SAmlan Barua //#endif 257c92418ccSAmlan Barua // ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr); 258c92418ccSAmlan Barua // should pdim be a member of Mat_FFTW? 259c92418ccSAmlan Barua // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 26011902ff2SHong Zhang 261b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 262b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 263b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 264b2b77a04SHong Zhang switch (ndim){ 265b2b77a04SHong Zhang case 1: 2663c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 267b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2683c3a9c75SAmlan Barua #endif 269b2b77a04SHong Zhang break; 270b2b77a04SHong Zhang case 2: 2713c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 272b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2733c3a9c75SAmlan Barua #else 2743c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2753c3a9c75SAmlan Barua #endif 276b2b77a04SHong Zhang break; 277b2b77a04SHong Zhang case 3: 2783c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 279b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2803c3a9c75SAmlan Barua #else 2813c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2823c3a9c75SAmlan Barua #endif 283b2b77a04SHong Zhang break; 284b2b77a04SHong Zhang default: 2853c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 286c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2873c3a9c75SAmlan Barua #else 2883c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2893c3a9c75SAmlan Barua #endif 290c92418ccSAmlan Barua // fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 291b2b77a04SHong Zhang break; 292b2b77a04SHong Zhang } 293b2b77a04SHong Zhang fftw->binarray = x_array; 294b2b77a04SHong Zhang fftw->boutarray = y_array; 295b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 296b2b77a04SHong Zhang } else { /* use existing plan */ 297b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 298b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 299b2b77a04SHong Zhang } else { 300b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 301b2b77a04SHong Zhang } 302b2b77a04SHong Zhang } 303b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 304b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 305b2b77a04SHong Zhang PetscFunctionReturn(0); 306b2b77a04SHong Zhang } 307b2b77a04SHong Zhang 308b2b77a04SHong Zhang #undef __FUNCT__ 309b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 310b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 311b2b77a04SHong Zhang { 312b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 313b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 314b2b77a04SHong Zhang PetscErrorCode ierr; 315b2b77a04SHong Zhang 316b2b77a04SHong Zhang PetscFunctionBegin; 317*b223cc97SAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 318*b223cc97SAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 319*b223cc97SAmlan Barua //#endif 320b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 321b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 322b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 323bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 324b2b77a04SHong Zhang PetscFunctionReturn(0); 325b2b77a04SHong Zhang } 326b2b77a04SHong Zhang 327c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 328b2b77a04SHong Zhang #undef __FUNCT__ 329b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 330b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 331b2b77a04SHong Zhang { 332b2b77a04SHong Zhang PetscErrorCode ierr; 333b2b77a04SHong Zhang PetscScalar *array; 334b2b77a04SHong Zhang 335b2b77a04SHong Zhang PetscFunctionBegin; 336b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 337b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 338b2b77a04SHong Zhang #endif 339b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 340b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 341b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 342b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 343b2b77a04SHong Zhang PetscFunctionReturn(0); 344b2b77a04SHong Zhang } 345b2b77a04SHong Zhang 346b2b77a04SHong Zhang #undef __FUNCT__ 3473c3a9c75SAmlan Barua #define __FUNCT__ "MatGetVecs1DC_FFTW" 348c92418ccSAmlan Barua /* 349c92418ccSAmlan Barua MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the 350c92418ccSAmlan Barua parallel layout determined by FFTW-1D 351c92418ccSAmlan Barua 352c92418ccSAmlan Barua */ 353c92418ccSAmlan Barua PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout) 354c92418ccSAmlan Barua { 355c92418ccSAmlan Barua PetscErrorCode ierr; 356c92418ccSAmlan Barua PetscMPIInt size,rank; 357c92418ccSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 358c92418ccSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 359c92418ccSAmlan Barua // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 360c92418ccSAmlan Barua PetscInt N=fft->N; 361c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 362c92418ccSAmlan Barua ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 363c92418ccSAmlan Barua ptrdiff_t f_local_n1,f_local_1_end; 364c92418ccSAmlan Barua ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 365c92418ccSAmlan Barua ptrdiff_t b_local_n1,b_local_1_end; 366c92418ccSAmlan Barua fftw_complex *data_fin,*data_fout,*data_bin,*data_bout; 367c92418ccSAmlan Barua 368c92418ccSAmlan Barua PetscFunctionBegin; 369c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 370c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 371c92418ccSAmlan Barua #endif 372c92418ccSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 373c92418ccSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 374c92418ccSAmlan Barua if (size == 1){ 375c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 376c92418ccSAmlan Barua } 377c92418ccSAmlan Barua else { 378c92418ccSAmlan Barua if (ndim>1){ 379c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 380c92418ccSAmlan Barua else { 381c92418ccSAmlan Barua f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 382c92418ccSAmlan Barua b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 383c92418ccSAmlan Barua if (fin) { 384c92418ccSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 385c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 386c92418ccSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 387c92418ccSAmlan Barua } 388c92418ccSAmlan Barua if (fout) { 389c92418ccSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 390c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 391c92418ccSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 392c92418ccSAmlan Barua } 393c92418ccSAmlan Barua if (bin) { 394c92418ccSAmlan Barua data_bin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 395c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr); 396c92418ccSAmlan Barua (*bin)->ops->destroy = VecDestroy_MPIFFTW; 397c92418ccSAmlan Barua } 398c92418ccSAmlan Barua if (bout) { 399c92418ccSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 400c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 401c92418ccSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 402c92418ccSAmlan Barua } 403c92418ccSAmlan Barua } 404c92418ccSAmlan Barua if (fin){ 405c92418ccSAmlan Barua ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 406c92418ccSAmlan Barua } 407c92418ccSAmlan Barua if (fout){ 408c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 409c92418ccSAmlan Barua } 410c92418ccSAmlan Barua if (bin){ 411c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr); 412c92418ccSAmlan Barua } 413c92418ccSAmlan Barua if (bout){ 414c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 415c92418ccSAmlan Barua } 416c92418ccSAmlan Barua PetscFunctionReturn(0); 417c92418ccSAmlan Barua } 418c92418ccSAmlan Barua 419c92418ccSAmlan Barua 420c92418ccSAmlan Barua } 4213c3a9c75SAmlan Barua 422c92418ccSAmlan Barua #undef __FUNCT__ 423b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW" 424b2b77a04SHong Zhang /* 425b2b77a04SHong Zhang MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 426b2b77a04SHong Zhang parallel layout determined by FFTW 427b2b77a04SHong Zhang 428b2b77a04SHong Zhang Collective on Mat 429b2b77a04SHong Zhang 430b2b77a04SHong Zhang Input Parameter: 431b2b77a04SHong Zhang . mat - the matrix 432b2b77a04SHong Zhang 433b2b77a04SHong Zhang Output Parameter: 434b2b77a04SHong Zhang + fin - (optional) input vector of forward FFTW 435b2b77a04SHong Zhang - fout - (optional) output vector of forward FFTW 436b2b77a04SHong Zhang 437b2b77a04SHong Zhang Level: advanced 438b2b77a04SHong Zhang 439b2b77a04SHong Zhang .seealso: MatCreateFFTW() 440b2b77a04SHong Zhang */ 441b2b77a04SHong Zhang PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 442b2b77a04SHong Zhang { 443b2b77a04SHong Zhang PetscErrorCode ierr; 444b2b77a04SHong Zhang PetscMPIInt size,rank; 445b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 446b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 447c92418ccSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 4483c3a9c75SAmlan Barua PetscInt N=fft->N, N1, n1,vsize; 449e81bb599SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 450b2b77a04SHong Zhang 451b2b77a04SHong Zhang PetscFunctionBegin; 4523c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 4533c3a9c75SAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 4543c3a9c75SAmlan Barua //#endif 455b2b77a04SHong Zhang PetscValidHeaderSpecific(A,MAT_CLASSID,1); 456b2b77a04SHong Zhang PetscValidType(A,1); 457b2b77a04SHong Zhang 458b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 459b2b77a04SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 460b2b77a04SHong Zhang if (size == 1){ /* sequential case */ 461e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX) 462b2b77a04SHong Zhang if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 463b2b77a04SHong Zhang if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 464e5d7f247SAmlan Barua #else 465e81bb599SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 466e81bb599SAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 4673c3a9c75SAmlan Barua printf("The code successfully comes at the end of the routine with one processor\n"); 468e81bb599SAmlan Barua #endif 469b2b77a04SHong Zhang } else { /* mpi case */ 470b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 4716882372aSHong Zhang ptrdiff_t local_n1,local_1_end; 472b2b77a04SHong Zhang fftw_complex *data_fin,*data_fout; 47351d1eed7SAmlan Barua double *data_finr ; 474b3a17365SAmlan Barua ptrdiff_t local_1_start,temp; 475c92418ccSAmlan Barua // PetscInt ctr; 476c92418ccSAmlan Barua // ptrdiff_t ndim1,*pdim; 477c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 478c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 47911902ff2SHong Zhang 480c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 481f76f76beSAmlan Barua // {k 482c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 483c92418ccSAmlan Barua // } 484b2b77a04SHong Zhang 485f76f76beSAmlan Barua 486f76f76beSAmlan Barua 487b2b77a04SHong Zhang switch (ndim){ 488b2b77a04SHong Zhang case 1: 4896882372aSHong Zhang /* Get local size */ 490e5d7f247SAmlan Barua /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */ 491c92418ccSAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 4926882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 4936882372aSHong Zhang if (fin) { 4946882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4956882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4966882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4976882372aSHong Zhang } 4986882372aSHong Zhang if (fout) { 4996882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5006882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5016882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5026882372aSHong Zhang } 503b2b77a04SHong Zhang break; 504b2b77a04SHong Zhang case 2: 5053c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5063c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5073c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 5083c3a9c75SAmlan Barua if (fin) { 5093c3a9c75SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 51054dd5118SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 5113c3a9c75SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 51209dd8a53SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 5133c3a9c75SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5143c3a9c75SAmlan Barua } 5153c3a9c75SAmlan Barua if (fout) { 51609dd8a53SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 51709dd8a53SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5183c3a9c75SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5193c3a9c75SAmlan Barua } 520f76f76beSAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 5213c3a9c75SAmlan Barua 5223c3a9c75SAmlan Barua #else 523b2b77a04SHong Zhang /* Get local size */ 52409dd8a53SAmlan Barua printf("Hope this does not come here"); 525b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 526b2b77a04SHong Zhang if (fin) { 527b2b77a04SHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 528b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 529b2b77a04SHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 530b2b77a04SHong Zhang } 531b2b77a04SHong Zhang if (fout) { 532b2b77a04SHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 533b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 534b2b77a04SHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 535b2b77a04SHong Zhang } 5363c3a9c75SAmlan Barua printf("Hope this does not come here"); 5373c3a9c75SAmlan Barua #endif 538b2b77a04SHong Zhang break; 539b2b77a04SHong Zhang case 3: 5406882372aSHong Zhang /* Get local size */ 5413c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 54251d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 54351d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 54451d1eed7SAmlan Barua if (fin) { 54551d1eed7SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 54651d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 54751d1eed7SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 54851d1eed7SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 54951d1eed7SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 55051d1eed7SAmlan Barua } 55151d1eed7SAmlan Barua if (fout) { 55251d1eed7SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 55351d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 55451d1eed7SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 55551d1eed7SAmlan Barua } 55651d1eed7SAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 55751d1eed7SAmlan Barua 55851d1eed7SAmlan Barua 5593c3a9c75SAmlan Barua #else 5606882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 56111902ff2SHong Zhang // printf("The quantity n is %d",n); 5626882372aSHong Zhang if (fin) { 5636882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5646882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5656882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5666882372aSHong Zhang } 5676882372aSHong Zhang if (fout) { 5686882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5696882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5706882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5716882372aSHong Zhang } 5723c3a9c75SAmlan Barua #endif 573b2b77a04SHong Zhang break; 574b2b77a04SHong Zhang default: 5756882372aSHong Zhang /* Get local size */ 5763c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 577b3a17365SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 578b3a17365SAmlan Barua printf("The value of temp is %ld\n",temp); 579b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 580b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 581b3a17365SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 582b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 583b3a17365SAmlan Barua if (fin) { 584b3a17365SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 585b3a17365SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 586b3a17365SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 587b3a17365SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 588b3a17365SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 589b3a17365SAmlan Barua } 590b3a17365SAmlan Barua if (fout) { 591b3a17365SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 592b3a17365SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 593b3a17365SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 594b3a17365SAmlan Barua } 595b3a17365SAmlan Barua 5963c3a9c75SAmlan Barua #else 597c92418ccSAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 59811902ff2SHong Zhang // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 59911902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 60011902ff2SHong Zhang // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 60111902ff2SHong Zhang // for(i=0;i<ndim;i++) 60211902ff2SHong Zhang // { 60311902ff2SHong Zhang // pdim[i]=dim[i];printf("%d",pdim[i]); 60411902ff2SHong Zhang // } 60511902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 60611902ff2SHong Zhang // printf("The quantity n is %d",n); 6076882372aSHong Zhang if (fin) { 6086882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6096882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 6106882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6116882372aSHong Zhang } 6126882372aSHong Zhang if (fout) { 6136882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6146882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6156882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6166882372aSHong Zhang } 6173c3a9c75SAmlan Barua #endif 618b2b77a04SHong Zhang break; 619b2b77a04SHong Zhang } 620b2b77a04SHong Zhang } 62154dd5118SAmlan Barua // if (fin){ 62254dd5118SAmlan Barua // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 62354dd5118SAmlan Barua // } 62454dd5118SAmlan Barua // if (fout){ 62554dd5118SAmlan Barua // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 62654dd5118SAmlan Barua // } 627b2b77a04SHong Zhang PetscFunctionReturn(0); 628b2b77a04SHong Zhang } 629b2b77a04SHong Zhang 630b2b77a04SHong Zhang #undef __FUNCT__ 6313c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT" 6323c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 6333c3a9c75SAmlan Barua { 6343c3a9c75SAmlan Barua PetscErrorCode ierr; 6353c3a9c75SAmlan Barua PetscFunctionBegin; 6363c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 6373c3a9c75SAmlan Barua PetscFunctionReturn(0); 6383c3a9c75SAmlan Barua } 63954efbe56SHong Zhang 640b2b77a04SHong Zhang /* 6413c3a9c75SAmlan Barua InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block 6423c3a9c75SAmlan Barua Input A, x, y 6433c3a9c75SAmlan Barua A - FFTW matrix 64454dd5118SAmlan Barua x - user data 645b2b77a04SHong Zhang Options Database Keys: 646b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 647b2b77a04SHong Zhang 648b2b77a04SHong Zhang Level: intermediate 649b2b77a04SHong Zhang 650b2b77a04SHong Zhang */ 6513c3a9c75SAmlan Barua 6523c3a9c75SAmlan Barua EXTERN_C_BEGIN 6533c3a9c75SAmlan Barua #undef __FUNCT__ 6543c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW" 6553c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 6563c3a9c75SAmlan Barua { 6573c3a9c75SAmlan Barua PetscErrorCode ierr; 6583c3a9c75SAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 6593c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 6603c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 6613c3a9c75SAmlan Barua PetscInt N=fft->N, N1, n1 ,NM; 662*b223cc97SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 663*b223cc97SAmlan Barua PetscInt low, *indx1, *indx2, tempindx, tempindx1; 664e5d7f247SAmlan Barua PetscInt i,j,k,rank,size,partial_dim; 6653c3a9c75SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 666e5d7f247SAmlan Barua ptrdiff_t local_n1,local_1_start,temp; 6673c3a9c75SAmlan Barua VecScatter vecscat; 6683c3a9c75SAmlan Barua IS list1,list2; 6693c3a9c75SAmlan Barua 670f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 671f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 6723c3a9c75SAmlan Barua ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 673f76f76beSAmlan Barua printf("Local ownership starts at %d\n",low); 6743c3a9c75SAmlan Barua 675e81bb599SAmlan Barua if (size==1) 676e81bb599SAmlan Barua { 677e81bb599SAmlan Barua switch (ndim){ 678e81bb599SAmlan Barua case 1: 679e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 680e81bb599SAmlan Barua for (i=0;i<dim[0];i++) 681e81bb599SAmlan Barua { 682e81bb599SAmlan Barua indx1[i] = i; 683e81bb599SAmlan Barua } 684e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 685e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 686e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 687e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 688e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 689*b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 690*b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 691e81bb599SAmlan Barua break; 692e81bb599SAmlan Barua 693e81bb599SAmlan Barua case 2: 694e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 695e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 696e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 697e81bb599SAmlan Barua indx1[i*dim[1]+j] = i*dim[1] + j; 698e81bb599SAmlan Barua } 699e81bb599SAmlan Barua } 700e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 701e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 702e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 703e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 704e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 705*b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 706*b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 707e81bb599SAmlan Barua break; 708e81bb599SAmlan Barua case 3: 709e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 710e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 711e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 712e81bb599SAmlan Barua for (k=0;k<dim[2];k++){ 713e81bb599SAmlan Barua indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 714e81bb599SAmlan Barua } 715e81bb599SAmlan Barua } 716e81bb599SAmlan Barua } 717e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 718e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 719e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 720e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 721e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 722*b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 723*b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 724e81bb599SAmlan Barua break; 725e81bb599SAmlan Barua default: 7266971673cSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1); 7276971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 7286971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7296971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7306971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 731*b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 7326971673cSAmlan Barua //ierr = ISDestroy(list1);CHKERRQ(ierr); 733e81bb599SAmlan Barua break; 734e81bb599SAmlan Barua } 735e81bb599SAmlan Barua } 736e81bb599SAmlan Barua 737e81bb599SAmlan Barua else{ 738e81bb599SAmlan Barua 7393c3a9c75SAmlan Barua switch (ndim){ 7403c3a9c75SAmlan Barua case 1: 741e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 7423c3a9c75SAmlan Barua break; 7433c3a9c75SAmlan Barua case 2: 7443c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 7453c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 7463c3a9c75SAmlan Barua 7475b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 7485b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 7495b3e41ffSAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 7503c3a9c75SAmlan Barua 7513c3a9c75SAmlan Barua if (dim[1]%2==0) 7523c3a9c75SAmlan Barua NM = dim[1]+2; 7533c3a9c75SAmlan Barua else 7543c3a9c75SAmlan Barua NM = dim[1]+1; 7553c3a9c75SAmlan Barua 7563c3a9c75SAmlan Barua for (i=0;i<local_n0;i++){ 7573c3a9c75SAmlan Barua for (j=0;j<dim[1];j++){ 7583c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 7593c3a9c75SAmlan Barua tempindx1 = i*NM + j; 7605b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 7613c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 7625b3e41ffSAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 7635b3e41ffSAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 7645b3e41ffSAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 7655b3e41ffSAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 7663c3a9c75SAmlan Barua } 7673c3a9c75SAmlan Barua } 7683c3a9c75SAmlan Barua 7693c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 7703c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 7713c3a9c75SAmlan Barua 772f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 773f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 774f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 775f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 776*b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 777*b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 778*b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 779*b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 7803c3a9c75SAmlan Barua break; 7813c3a9c75SAmlan Barua 7823c3a9c75SAmlan Barua case 3: 78351d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 78451d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 78551d1eed7SAmlan Barua 78651d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 78751d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 78851d1eed7SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 78951d1eed7SAmlan Barua 79051d1eed7SAmlan Barua if (dim[2]%2==0) 79151d1eed7SAmlan Barua NM = dim[2]+2; 79251d1eed7SAmlan Barua else 79351d1eed7SAmlan Barua NM = dim[2]+1; 79451d1eed7SAmlan Barua 79551d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 79651d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 79751d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 79851d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 79951d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 80051d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 80151d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 80251d1eed7SAmlan Barua } 80351d1eed7SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 80451d1eed7SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 80551d1eed7SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 80651d1eed7SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 80751d1eed7SAmlan Barua } 80851d1eed7SAmlan Barua } 80951d1eed7SAmlan Barua 81051d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 81151d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 81251d1eed7SAmlan Barua 81351d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 81451d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 81551d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 81651d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 817*b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 818*b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 819*b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 820*b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 8213c3a9c75SAmlan Barua break; 8223c3a9c75SAmlan Barua 8233c3a9c75SAmlan Barua default: 824e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 825e5d7f247SAmlan Barua printf("The value of temp is %ld\n",temp); 826e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 827e5d7f247SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 828e5d7f247SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 829e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 830e5d7f247SAmlan Barua 831e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 832e5d7f247SAmlan Barua printf("The value of partial dim is %d\n",partial_dim); 833e5d7f247SAmlan Barua 834e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 835e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 836e5d7f247SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 837e5d7f247SAmlan Barua 838e5d7f247SAmlan Barua if (dim[ndim-1]%2==0) 839ba6e06d0SAmlan Barua NM = 2; 840e5d7f247SAmlan Barua else 841ba6e06d0SAmlan Barua NM = 1; 842e5d7f247SAmlan Barua 8436971673cSAmlan Barua j = low; 8446971673cSAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 8456971673cSAmlan Barua { 8466971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 8476971673cSAmlan Barua indx2[i] = j; 848ba6e06d0SAmlan Barua //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank); 8496971673cSAmlan Barua if (k%dim[ndim-1]==0) 8506971673cSAmlan Barua { j+=NM;} 8516971673cSAmlan Barua j++; 8526971673cSAmlan Barua } 8536971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 8546971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 8556971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 8566971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8576971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8586971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 859*b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 860*b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 861*b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 862*b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 8633c3a9c75SAmlan Barua break; 8643c3a9c75SAmlan Barua } 865e81bb599SAmlan Barua } 8663c3a9c75SAmlan Barua 8673c3a9c75SAmlan Barua return 0; 8683c3a9c75SAmlan Barua } 8693c3a9c75SAmlan Barua EXTERN_C_END 8703c3a9c75SAmlan Barua 8713c3a9c75SAmlan Barua #undef __FUNCT__ 8723c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT" 8733c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 8743c3a9c75SAmlan Barua { 8753c3a9c75SAmlan Barua PetscErrorCode ierr; 8763c3a9c75SAmlan Barua PetscFunctionBegin; 8773c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 8783c3a9c75SAmlan Barua PetscFunctionReturn(0); 8793c3a9c75SAmlan Barua } 88054efbe56SHong Zhang 8815b3e41ffSAmlan Barua /* 8825b3e41ffSAmlan Barua OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 8835b3e41ffSAmlan Barua Input A, x, y 8845b3e41ffSAmlan Barua A - FFTW matrix 8855b3e41ffSAmlan Barua x - FFTW vector 8865b3e41ffSAmlan Barua y - PETSc vector that user can read 8875b3e41ffSAmlan Barua Options Database Keys: 8885b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 8895b3e41ffSAmlan Barua 8905b3e41ffSAmlan Barua Level: intermediate 8915b3e41ffSAmlan Barua 8923c3a9c75SAmlan Barua */ 8933c3a9c75SAmlan Barua 8943c3a9c75SAmlan Barua EXTERN_C_BEGIN 8953c3a9c75SAmlan Barua #undef __FUNCT__ 8965b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW" 8975b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 8985b3e41ffSAmlan Barua { 8995b3e41ffSAmlan Barua PetscErrorCode ierr; 9005b3e41ffSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 9015b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 9025b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 9035b3e41ffSAmlan Barua PetscInt N=fft->N, N1, n1 ,NM; 904*b223cc97SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 905*b223cc97SAmlan Barua PetscInt low, *indx1, *indx2, tempindx, tempindx1; 906ba6e06d0SAmlan Barua PetscInt i,j,k,rank,size,partial_dim; 9075b3e41ffSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 908ba6e06d0SAmlan Barua ptrdiff_t local_n1,local_1_start,temp; 9095b3e41ffSAmlan Barua VecScatter vecscat; 9105b3e41ffSAmlan Barua IS list1,list2; 9115b3e41ffSAmlan Barua 9125b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 9135b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 914cfe1ae98SAmlan Barua ierr = VecGetOwnershipRange(x,&low,PETSC_NULL); 9155b3e41ffSAmlan Barua printf("Local ownership starts at %d\n",low); 9165b3e41ffSAmlan Barua 917e81bb599SAmlan Barua if (size==1){ 9185b3e41ffSAmlan Barua switch (ndim){ 9195b3e41ffSAmlan Barua case 1: 920e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 921e81bb599SAmlan Barua for (i=0;i<dim[0];i++) 922e81bb599SAmlan Barua { 923e81bb599SAmlan Barua indx1[i] = i; 924e81bb599SAmlan Barua } 925e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 926e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 927e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 928e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 929e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 930*b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 931*b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 932e81bb599SAmlan Barua break; 933e81bb599SAmlan Barua 934e81bb599SAmlan Barua case 2: 935e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 936e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 937e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 938e81bb599SAmlan Barua indx1[i*dim[1]+j] = i*dim[1] + j; 939e81bb599SAmlan Barua } 940e81bb599SAmlan Barua } 941e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 942e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 943e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 944e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 946*b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 947*b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 948e81bb599SAmlan Barua break; 949e81bb599SAmlan Barua case 3: 950e81bb599SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 951e81bb599SAmlan Barua for (i=0;i<dim[0];i++){ 952e81bb599SAmlan Barua for (j=0;j<dim[1];j++){ 953e81bb599SAmlan Barua for (k=0;k<dim[2];k++){ 954e81bb599SAmlan Barua indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 955e81bb599SAmlan Barua } 956e81bb599SAmlan Barua } 957e81bb599SAmlan Barua } 958e81bb599SAmlan Barua ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 959e81bb599SAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 960e81bb599SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 961e81bb599SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 962e81bb599SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 963*b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 964*b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 965e81bb599SAmlan Barua break; 966e81bb599SAmlan Barua default: 9676971673cSAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1); 9686971673cSAmlan Barua //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF); 9696971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 9706971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9716971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9726971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 973*b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 974e81bb599SAmlan Barua break; 975e81bb599SAmlan Barua } 976e81bb599SAmlan Barua } 977e81bb599SAmlan Barua else{ 978e81bb599SAmlan Barua 979e81bb599SAmlan Barua switch (ndim){ 980e81bb599SAmlan Barua case 1: 981e81bb599SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support yet"); 9825b3e41ffSAmlan Barua break; 9835b3e41ffSAmlan Barua case 2: 9845b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 9855b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 9865b3e41ffSAmlan Barua 9875b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 9885b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 9895b3e41ffSAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 9905b3e41ffSAmlan Barua 9915b3e41ffSAmlan Barua if (dim[1]%2==0) 9925b3e41ffSAmlan Barua NM = dim[1]+2; 9935b3e41ffSAmlan Barua else 9945b3e41ffSAmlan Barua NM = dim[1]+1; 9955b3e41ffSAmlan Barua 9965b3e41ffSAmlan Barua 9975b3e41ffSAmlan Barua 9985b3e41ffSAmlan Barua for (i=0;i<local_n0;i++){ 9995b3e41ffSAmlan Barua for (j=0;j<dim[1];j++){ 10005b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 10015b3e41ffSAmlan Barua tempindx1 = i*NM + j; 10025b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 10035b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 1004cfe1ae98SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 1005cfe1ae98SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 1006cfe1ae98SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 1007cfe1ae98SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 10085b3e41ffSAmlan Barua } 10095b3e41ffSAmlan Barua } 10105b3e41ffSAmlan Barua 10115b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 10125b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 10135b3e41ffSAmlan Barua 10145b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 10155b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10165b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10175b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1018*b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1019*b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1020*b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1021*b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 10225b3e41ffSAmlan Barua break; 10235b3e41ffSAmlan Barua 10245b3e41ffSAmlan Barua case 3: 102551d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 102651d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 102751d1eed7SAmlan Barua 102851d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 102951d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 103051d1eed7SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 103151d1eed7SAmlan Barua 103251d1eed7SAmlan Barua if (dim[2]%2==0) 103351d1eed7SAmlan Barua NM = dim[2]+2; 103451d1eed7SAmlan Barua else 103551d1eed7SAmlan Barua NM = dim[2]+1; 103651d1eed7SAmlan Barua 103751d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 103851d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 103951d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 104051d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 104151d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 104251d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 104351d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 104451d1eed7SAmlan Barua } 104551d1eed7SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 104651d1eed7SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 104751d1eed7SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 104851d1eed7SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 104951d1eed7SAmlan Barua } 105051d1eed7SAmlan Barua } 105151d1eed7SAmlan Barua 105251d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 105351d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 105451d1eed7SAmlan Barua 105551d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 105651d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 105751d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 105851d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1059*b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1060*b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1061*b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1062*b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 10635b3e41ffSAmlan Barua break; 10645b3e41ffSAmlan Barua 10655b3e41ffSAmlan Barua default: 1066ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1067ba6e06d0SAmlan Barua printf("The value of temp is %ld\n",temp); 1068ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1069ba6e06d0SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1070ba6e06d0SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1071ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1072ba6e06d0SAmlan Barua 1073ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1074ba6e06d0SAmlan Barua printf("The value of partial dim is %d\n",partial_dim); 1075ba6e06d0SAmlan Barua 1076ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1077ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1078ba6e06d0SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 1079ba6e06d0SAmlan Barua 1080ba6e06d0SAmlan Barua if (dim[ndim-1]%2==0) 1081ba6e06d0SAmlan Barua NM = 2; 1082ba6e06d0SAmlan Barua else 1083ba6e06d0SAmlan Barua NM = 1; 1084ba6e06d0SAmlan Barua 1085ba6e06d0SAmlan Barua j = low; 1086ba6e06d0SAmlan Barua for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1087ba6e06d0SAmlan Barua { 1088ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1089ba6e06d0SAmlan Barua indx2[i] = j; 1090ba6e06d0SAmlan Barua //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank); 1091ba6e06d0SAmlan Barua if (k%dim[ndim-1]==0) 1092ba6e06d0SAmlan Barua { j+=NM;} 1093ba6e06d0SAmlan Barua j++; 1094ba6e06d0SAmlan Barua } 1095ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1096ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1097ba6e06d0SAmlan Barua 1098ba6e06d0SAmlan Barua //ISView(list1,PETSC_VIEWER_STDOUT_SELF); 1099ba6e06d0SAmlan Barua 1100ba6e06d0SAmlan Barua 1101ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1102ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1103ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1104ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1105*b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1106*b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1107*b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1108*b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1109ba6e06d0SAmlan Barua 11105b3e41ffSAmlan Barua break; 11115b3e41ffSAmlan Barua } 1112e81bb599SAmlan Barua } 11135b3e41ffSAmlan Barua return 0; 11145b3e41ffSAmlan Barua } 11155b3e41ffSAmlan Barua EXTERN_C_END 11165b3e41ffSAmlan Barua 11175b3e41ffSAmlan Barua EXTERN_C_BEGIN 11185b3e41ffSAmlan Barua #undef __FUNCT__ 11193c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 11203c3a9c75SAmlan Barua /* 11213c3a9c75SAmlan Barua MatCreate_FFTW - Creates a matrix object that provides FFT 11223c3a9c75SAmlan Barua via the external package FFTW 11233c3a9c75SAmlan Barua Options Database Keys: 11243c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 11253c3a9c75SAmlan Barua 11263c3a9c75SAmlan Barua Level: intermediate 11273c3a9c75SAmlan Barua 11283c3a9c75SAmlan Barua */ 11293c3a9c75SAmlan Barua 1130b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 1131b2b77a04SHong Zhang { 1132b2b77a04SHong Zhang PetscErrorCode ierr; 1133b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 1134b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1135b2b77a04SHong Zhang Mat_FFTW *fftw; 1136b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1137b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1138b2b77a04SHong Zhang PetscBool flg; 1139b3a17365SAmlan Barua PetscInt p_flag,partial_dim=1,ctr,N1; 114011902ff2SHong Zhang PetscMPIInt size,rank; 1141b3a17365SAmlan Barua ptrdiff_t *pdim, temp; 11425b3e41ffSAmlan Barua ptrdiff_t local_n1,local_1_start; 1143b2b77a04SHong Zhang 1144b2b77a04SHong Zhang PetscFunctionBegin; 11453c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 11463c3a9c75SAmlan Barua // SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 11473c3a9c75SAmlan Barua //#endif 1148b2b77a04SHong Zhang 1149b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 115011902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 115154dd5118SAmlan Barua ierr = MPI_Barrier(PETSC_COMM_WORLD); 1152c92418ccSAmlan Barua 115311902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 115411902ff2SHong Zhang pdim[0] = dim[0]; 115511902ff2SHong Zhang for(ctr=1;ctr<ndim;ctr++) 115611902ff2SHong Zhang { 11576882372aSHong Zhang partial_dim *= dim[ctr]; 115811902ff2SHong Zhang pdim[ctr] = dim[ctr]; 11596882372aSHong Zhang } 11603c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 11613c3a9c75SAmlan Barua // SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 11623c3a9c75SAmlan Barua //#endif 11633c3a9c75SAmlan Barua 116411902ff2SHong Zhang // printf("partial dimension is %d",partial_dim); 1165b2b77a04SHong Zhang if (size == 1) { 1166e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1167b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1168b2b77a04SHong Zhang n = N; 1169e81bb599SAmlan Barua #else 1170e81bb599SAmlan Barua int tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1]; 1171e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1172e81bb599SAmlan Barua n = tot_dim; 1173e81bb599SAmlan Barua #endif 1174e81bb599SAmlan Barua 1175b2b77a04SHong Zhang } else { 1176*b223cc97SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start;//local_n1,local_1_end; 1177b2b77a04SHong Zhang switch (ndim){ 1178b2b77a04SHong Zhang case 1: 11793c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11803c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1181e5d7f247SAmlan Barua #else 11826882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 11836882372aSHong Zhang n = (PetscInt)local_n0; 11846882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1185e5d7f247SAmlan Barua #endif 11863c3a9c75SAmlan Barua // PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW); 1187b2b77a04SHong Zhang break; 1188b2b77a04SHong Zhang case 2: 11895b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 1190b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1191b2b77a04SHong Zhang /* 1192b2b77a04SHong Zhang PetscMPIInt rank; 1193b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1194b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 1195b2b77a04SHong Zhang */ 1196b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1197b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 11985b3e41ffSAmlan Barua #else 11995b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 12005b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 12015b3e41ffSAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 12025b3e41ffSAmlan Barua #endif 1203b2b77a04SHong Zhang break; 1204b2b77a04SHong Zhang case 3: 120511902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 120651d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 120751d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 12086882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 12096882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 121051d1eed7SAmlan Barua #else 121151d1eed7SAmlan Barua printf("The code comes here\n"); 121251d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 121351d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 121451d1eed7SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 121551d1eed7SAmlan Barua #endif 1216b2b77a04SHong Zhang break; 1217b2b77a04SHong Zhang default: 1218b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 121911902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1220c92418ccSAmlan Barua // printf("The value of alloc local is %ld from process %d\n",alloc_local,rank); 122111902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 12226882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 122311902ff2SHong Zhang // printf("New partial dimension is %d %d %d",n,N,ndim); 12246882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1225b3a17365SAmlan Barua #else 1226b3a17365SAmlan Barua temp = pdim[ndim-1]; 1227b3a17365SAmlan Barua pdim[ndim-1]= temp/2 + 1; 1228b3a17365SAmlan Barua printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]); 1229b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1230b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1231b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1232b3a17365SAmlan Barua pdim[ndim-1] = temp; 1233b3a17365SAmlan Barua printf("For Multi dim case n = %d, N1 = %d\n",n,N1); 1234b3a17365SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1235b3a17365SAmlan Barua #endif 1236b2b77a04SHong Zhang break; 1237b2b77a04SHong Zhang } 1238b2b77a04SHong Zhang } 1239b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1240b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1241b2b77a04SHong Zhang fft->data = (void*)fftw; 1242b2b77a04SHong Zhang 1243b2b77a04SHong Zhang fft->n = n; 1244c92418ccSAmlan Barua fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1245e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 1246c92418ccSAmlan Barua ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1247b1301b2eSHong Zhang for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1248c92418ccSAmlan Barua 1249b2b77a04SHong Zhang fftw->p_forward = 0; 1250b2b77a04SHong Zhang fftw->p_backward = 0; 1251b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1252b2b77a04SHong Zhang 1253b2b77a04SHong Zhang if (size == 1){ 1254b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1255b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1256b2b77a04SHong Zhang } else { 1257b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1258b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1259b2b77a04SHong Zhang } 1260b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1261b2b77a04SHong Zhang A->ops->getvecs = MatGetVecs_FFTW; 1262b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 12633c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12643c3a9c75SAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 12655b3e41ffSAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 12663c3a9c75SAmlan Barua #endif 1267b2b77a04SHong Zhang 1268b2b77a04SHong Zhang /* get runtime options */ 1269b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1270b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1271b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 1272b2b77a04SHong Zhang PetscOptionsEnd(); 1273b2b77a04SHong Zhang PetscFunctionReturn(0); 1274b2b77a04SHong Zhang } 1275b2b77a04SHong Zhang EXTERN_C_END 12763c3a9c75SAmlan Barua 12773c3a9c75SAmlan Barua 12783c3a9c75SAmlan Barua 12793c3a9c75SAmlan Barua 1280