1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14*e5d7f247SAmlan Barua PetscInt partial_dim; 15b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 16b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 19b2b77a04SHong Zhang } Mat_FFTW; 20b2b77a04SHong Zhang 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 28b2b77a04SHong Zhang 29b2b77a04SHong Zhang #undef __FUNCT__ 30b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 31b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 32b2b77a04SHong Zhang { 33b2b77a04SHong Zhang PetscErrorCode ierr; 34b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 35b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 36b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 37b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 38b2b77a04SHong Zhang 39b2b77a04SHong Zhang PetscFunctionBegin; 40b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 41b9ae062cSHong Zhang 42b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 43b2b77a04SHong Zhang #endif 44b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 45b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 46b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 47b2b77a04SHong Zhang switch (ndim){ 48b2b77a04SHong Zhang case 1: 49b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 50b2b77a04SHong Zhang break; 51b2b77a04SHong Zhang case 2: 52b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 53b2b77a04SHong Zhang break; 54b2b77a04SHong Zhang case 3: 55b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 56b2b77a04SHong Zhang break; 57b2b77a04SHong Zhang default: 58b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 59b2b77a04SHong Zhang break; 60b2b77a04SHong Zhang } 61b2b77a04SHong Zhang fftw->finarray = x_array; 62b2b77a04SHong Zhang fftw->foutarray = y_array; 63b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 64b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 65b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 66b2b77a04SHong Zhang } else { /* use existing plan */ 67b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 68b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 69b2b77a04SHong Zhang } else { 70b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 71b2b77a04SHong Zhang } 72b2b77a04SHong Zhang } 73b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 74b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 75b2b77a04SHong Zhang PetscFunctionReturn(0); 76b2b77a04SHong Zhang } 77b2b77a04SHong Zhang 78b2b77a04SHong Zhang #undef __FUNCT__ 79b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 80b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 81b2b77a04SHong Zhang { 82b2b77a04SHong Zhang PetscErrorCode ierr; 83b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 84b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 85b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 86b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 87b2b77a04SHong Zhang 88b2b77a04SHong Zhang PetscFunctionBegin; 89b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 90b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 91b2b77a04SHong Zhang #endif 92b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 93b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 94b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 95b2b77a04SHong Zhang switch (ndim){ 96b2b77a04SHong Zhang case 1: 97b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 98b2b77a04SHong Zhang break; 99b2b77a04SHong Zhang case 2: 100b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 101b2b77a04SHong Zhang break; 102b2b77a04SHong Zhang case 3: 103b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 104b2b77a04SHong Zhang break; 105b2b77a04SHong Zhang default: 106b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 107b2b77a04SHong Zhang break; 108b2b77a04SHong Zhang } 109b2b77a04SHong Zhang fftw->binarray = x_array; 110b2b77a04SHong Zhang fftw->boutarray = y_array; 111b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 112b2b77a04SHong Zhang } else { /* use existing plan */ 113b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 114b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 115b2b77a04SHong Zhang } else { 116b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 117b2b77a04SHong Zhang } 118b2b77a04SHong Zhang } 119b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 120b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 121b2b77a04SHong Zhang PetscFunctionReturn(0); 122b2b77a04SHong Zhang } 123b2b77a04SHong Zhang 124b2b77a04SHong Zhang #undef __FUNCT__ 125b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 126b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 127b2b77a04SHong Zhang { 128b2b77a04SHong Zhang PetscErrorCode ierr; 129b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 130b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 131b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 132c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 133b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 134c92418ccSAmlan Barua // PetscInt ctr; 135c92418ccSAmlan Barua // ptrdiff_t ndim1=(ptrdiff_t) ndim,*pdim; 136c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 137c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 138c92418ccSAmlan Barua 139c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 140c92418ccSAmlan Barua // { 141c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 142c92418ccSAmlan Barua // } 143b2b77a04SHong Zhang 144b2b77a04SHong Zhang PetscFunctionBegin; 1455b3e41ffSAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 1465b3e41ffSAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 1475b3e41ffSAmlan Barua //#endif 148c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 149c92418ccSAmlan Barua // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 15011902ff2SHong Zhang 151b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 152b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 153b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 154b2b77a04SHong Zhang switch (ndim){ 155b2b77a04SHong Zhang case 1: 1563c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 157b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1583c3a9c75SAmlan Barua #endif 159b2b77a04SHong Zhang break; 160b2b77a04SHong Zhang case 2: 1613c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 162b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1633c3a9c75SAmlan Barua #else 1645b3e41ffSAmlan Barua printf("The code comes here \n"); 1653c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1663c3a9c75SAmlan Barua #endif 167b2b77a04SHong Zhang break; 168b2b77a04SHong Zhang case 3: 1693c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 170b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1713c3a9c75SAmlan Barua #else 17251d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1733c3a9c75SAmlan Barua #endif 174b2b77a04SHong Zhang break; 175b2b77a04SHong Zhang default: 1763c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 177c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1783c3a9c75SAmlan Barua #else 1793c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1803c3a9c75SAmlan Barua #endif 18111902ff2SHong Zhang // fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 182b2b77a04SHong Zhang break; 183b2b77a04SHong Zhang } 184b2b77a04SHong Zhang fftw->finarray = x_array; 185b2b77a04SHong Zhang fftw->foutarray = y_array; 186b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 187b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 188b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 189b2b77a04SHong Zhang } else { /* use existing plan */ 190b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 191b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 192b2b77a04SHong Zhang } else { 193b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 194b2b77a04SHong Zhang } 195b2b77a04SHong Zhang } 196b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 197b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 198b2b77a04SHong Zhang PetscFunctionReturn(0); 199b2b77a04SHong Zhang } 200b2b77a04SHong Zhang 201b2b77a04SHong Zhang #undef __FUNCT__ 202b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 203b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 204b2b77a04SHong Zhang { 205b2b77a04SHong Zhang PetscErrorCode ierr; 206b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 207b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 208b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 209c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 210b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 211c92418ccSAmlan Barua // PetscInt ctr; 212c92418ccSAmlan Barua // ptrdiff_t ndim1=(ptrdiff_t)ndim,*pdim; 213c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 214c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 215c92418ccSAmlan Barua 216c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 217c92418ccSAmlan Barua // { 218c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 219c92418ccSAmlan Barua // } 220b2b77a04SHong Zhang 221b2b77a04SHong Zhang PetscFunctionBegin; 2223c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 2233c3a9c75SAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 2243c3a9c75SAmlan Barua //#endif 225c92418ccSAmlan Barua // ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr); 226c92418ccSAmlan Barua // should pdim be a member of Mat_FFTW? 227c92418ccSAmlan Barua // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 22811902ff2SHong Zhang 229b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 230b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 231b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 232b2b77a04SHong Zhang switch (ndim){ 233b2b77a04SHong Zhang case 1: 2343c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 235b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2363c3a9c75SAmlan Barua #endif 237b2b77a04SHong Zhang break; 238b2b77a04SHong Zhang case 2: 2393c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 240b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2413c3a9c75SAmlan Barua #else 2423c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2433c3a9c75SAmlan Barua #endif 244b2b77a04SHong Zhang break; 245b2b77a04SHong Zhang case 3: 2463c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 247b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2483c3a9c75SAmlan Barua #else 2493c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2503c3a9c75SAmlan Barua #endif 251b2b77a04SHong Zhang break; 252b2b77a04SHong Zhang default: 2533c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 254c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2553c3a9c75SAmlan Barua #else 2563c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2573c3a9c75SAmlan Barua #endif 258c92418ccSAmlan Barua // fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 259b2b77a04SHong Zhang break; 260b2b77a04SHong Zhang } 261b2b77a04SHong Zhang fftw->binarray = x_array; 262b2b77a04SHong Zhang fftw->boutarray = y_array; 263b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 264b2b77a04SHong Zhang } else { /* use existing plan */ 265b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 266b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 267b2b77a04SHong Zhang } else { 268b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 269b2b77a04SHong Zhang } 270b2b77a04SHong Zhang } 271b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 272b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 273b2b77a04SHong Zhang PetscFunctionReturn(0); 274b2b77a04SHong Zhang } 275b2b77a04SHong Zhang 276b2b77a04SHong Zhang #undef __FUNCT__ 277b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 278b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 279b2b77a04SHong Zhang { 280b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 281b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 282b2b77a04SHong Zhang PetscErrorCode ierr; 283b2b77a04SHong Zhang 284b2b77a04SHong Zhang PetscFunctionBegin; 285b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 286b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 287b2b77a04SHong Zhang #endif 288b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 289b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 290b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 291bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 292b2b77a04SHong Zhang PetscFunctionReturn(0); 293b2b77a04SHong Zhang } 294b2b77a04SHong Zhang 295c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 296b2b77a04SHong Zhang #undef __FUNCT__ 297b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 298b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 299b2b77a04SHong Zhang { 300b2b77a04SHong Zhang PetscErrorCode ierr; 301b2b77a04SHong Zhang PetscScalar *array; 302b2b77a04SHong Zhang 303b2b77a04SHong Zhang PetscFunctionBegin; 304b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 305b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 306b2b77a04SHong Zhang #endif 307b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 308b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 309b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 310b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 311b2b77a04SHong Zhang PetscFunctionReturn(0); 312b2b77a04SHong Zhang } 313b2b77a04SHong Zhang 314b2b77a04SHong Zhang #undef __FUNCT__ 3153c3a9c75SAmlan Barua #define __FUNCT__ "MatGetVecs1DC_FFTW" 316c92418ccSAmlan Barua /* 317c92418ccSAmlan Barua MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the 318c92418ccSAmlan Barua parallel layout determined by FFTW-1D 319c92418ccSAmlan Barua 320c92418ccSAmlan Barua */ 321c92418ccSAmlan Barua PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout) 322c92418ccSAmlan Barua { 323c92418ccSAmlan Barua PetscErrorCode ierr; 324c92418ccSAmlan Barua PetscMPIInt size,rank; 325c92418ccSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 326c92418ccSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 327c92418ccSAmlan Barua // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 328c92418ccSAmlan Barua PetscInt N=fft->N; 329c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 330c92418ccSAmlan Barua ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 331c92418ccSAmlan Barua ptrdiff_t f_local_n1,f_local_1_end; 332c92418ccSAmlan Barua ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 333c92418ccSAmlan Barua ptrdiff_t b_local_n1,b_local_1_end; 334c92418ccSAmlan Barua fftw_complex *data_fin,*data_fout,*data_bin,*data_bout; 335c92418ccSAmlan Barua 336c92418ccSAmlan Barua PetscFunctionBegin; 337c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 338c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 339c92418ccSAmlan Barua #endif 340c92418ccSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 341c92418ccSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 342c92418ccSAmlan Barua if (size == 1){ 343c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 344c92418ccSAmlan Barua } 345c92418ccSAmlan Barua else { 346c92418ccSAmlan Barua if (ndim>1){ 347c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 348c92418ccSAmlan Barua else { 349c92418ccSAmlan Barua f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 350c92418ccSAmlan Barua b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 351c92418ccSAmlan Barua if (fin) { 352c92418ccSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 353c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 354c92418ccSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 355c92418ccSAmlan Barua } 356c92418ccSAmlan Barua if (fout) { 357c92418ccSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 358c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 359c92418ccSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 360c92418ccSAmlan Barua } 361c92418ccSAmlan Barua if (bin) { 362c92418ccSAmlan Barua data_bin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 363c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr); 364c92418ccSAmlan Barua (*bin)->ops->destroy = VecDestroy_MPIFFTW; 365c92418ccSAmlan Barua } 366c92418ccSAmlan Barua if (bout) { 367c92418ccSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 368c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 369c92418ccSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 370c92418ccSAmlan Barua } 371c92418ccSAmlan Barua } 372c92418ccSAmlan Barua if (fin){ 373c92418ccSAmlan Barua ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 374c92418ccSAmlan Barua } 375c92418ccSAmlan Barua if (fout){ 376c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 377c92418ccSAmlan Barua } 378c92418ccSAmlan Barua if (bin){ 379c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr); 380c92418ccSAmlan Barua } 381c92418ccSAmlan Barua if (bout){ 382c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 383c92418ccSAmlan Barua } 384c92418ccSAmlan Barua PetscFunctionReturn(0); 385c92418ccSAmlan Barua } 386c92418ccSAmlan Barua 387c92418ccSAmlan Barua 388c92418ccSAmlan Barua } 3893c3a9c75SAmlan Barua 390c92418ccSAmlan Barua #undef __FUNCT__ 391b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW" 392b2b77a04SHong Zhang /* 393b2b77a04SHong Zhang MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 394b2b77a04SHong Zhang parallel layout determined by FFTW 395b2b77a04SHong Zhang 396b2b77a04SHong Zhang Collective on Mat 397b2b77a04SHong Zhang 398b2b77a04SHong Zhang Input Parameter: 399b2b77a04SHong Zhang . mat - the matrix 400b2b77a04SHong Zhang 401b2b77a04SHong Zhang Output Parameter: 402b2b77a04SHong Zhang + fin - (optional) input vector of forward FFTW 403b2b77a04SHong Zhang - fout - (optional) output vector of forward FFTW 404b2b77a04SHong Zhang 405b2b77a04SHong Zhang Level: advanced 406b2b77a04SHong Zhang 407b2b77a04SHong Zhang .seealso: MatCreateFFTW() 408b2b77a04SHong Zhang */ 409b2b77a04SHong Zhang PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 410b2b77a04SHong Zhang { 411b2b77a04SHong Zhang PetscErrorCode ierr; 412b2b77a04SHong Zhang PetscMPIInt size,rank; 413b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 414b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 415c92418ccSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 4163c3a9c75SAmlan Barua PetscInt N=fft->N, N1, n1,vsize; 417b2b77a04SHong Zhang 418b2b77a04SHong Zhang PetscFunctionBegin; 4193c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 4203c3a9c75SAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 4213c3a9c75SAmlan Barua //#endif 422b2b77a04SHong Zhang PetscValidHeaderSpecific(A,MAT_CLASSID,1); 423b2b77a04SHong Zhang PetscValidType(A,1); 424b2b77a04SHong Zhang 425b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 426b2b77a04SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 427b2b77a04SHong Zhang if (size == 1){ /* sequential case */ 428*e5d7f247SAmlan Barua #if defined(PETSC_USE_COMPLEX) 429b2b77a04SHong Zhang if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 430b2b77a04SHong Zhang if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 431*e5d7f247SAmlan Barua #else 432*e5d7f247SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 433*e5d7f247SAmlan Barua if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*(N/2+1),fout);CHKERRQ(ierr);} 434*e5d7f247SAmlan Barua #endif 4353c3a9c75SAmlan Barua printf("The code successfully comes at the end of the routine with one processor\n"); 436b2b77a04SHong Zhang } else { /* mpi case */ 437b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 4386882372aSHong Zhang ptrdiff_t local_n1,local_1_end; 439c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 440b2b77a04SHong Zhang fftw_complex *data_fin,*data_fout; 44151d1eed7SAmlan Barua double *data_finr ; 442b3a17365SAmlan Barua ptrdiff_t local_1_start,temp; 443c92418ccSAmlan Barua // PetscInt ctr; 444c92418ccSAmlan Barua // ptrdiff_t ndim1,*pdim; 445c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 446c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 44711902ff2SHong Zhang 448c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 449f76f76beSAmlan Barua // {k 450c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 451c92418ccSAmlan Barua // } 452b2b77a04SHong Zhang 453f76f76beSAmlan Barua 454f76f76beSAmlan Barua 455b2b77a04SHong Zhang switch (ndim){ 456b2b77a04SHong Zhang case 1: 4576882372aSHong Zhang /* Get local size */ 458*e5d7f247SAmlan Barua /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */ 459c92418ccSAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 4606882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 4616882372aSHong Zhang if (fin) { 4626882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4636882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4646882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4656882372aSHong Zhang } 4666882372aSHong Zhang if (fout) { 4676882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4686882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4696882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4706882372aSHong Zhang } 471b2b77a04SHong Zhang break; 472b2b77a04SHong Zhang case 2: 4733c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4743c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4753c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 4763c3a9c75SAmlan Barua if (fin) { 4773c3a9c75SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 47854dd5118SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 4793c3a9c75SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 48009dd8a53SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 4813c3a9c75SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4823c3a9c75SAmlan Barua } 4833c3a9c75SAmlan Barua if (fout) { 48409dd8a53SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 48509dd8a53SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4863c3a9c75SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4873c3a9c75SAmlan Barua } 488f76f76beSAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 4893c3a9c75SAmlan Barua 4903c3a9c75SAmlan Barua #else 491b2b77a04SHong Zhang /* Get local size */ 49209dd8a53SAmlan Barua printf("Hope this does not come here"); 493b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 494b2b77a04SHong Zhang if (fin) { 495b2b77a04SHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 496b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 497b2b77a04SHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 498b2b77a04SHong Zhang } 499b2b77a04SHong Zhang if (fout) { 500b2b77a04SHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 501b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 502b2b77a04SHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 503b2b77a04SHong Zhang } 5043c3a9c75SAmlan Barua printf("Hope this does not come here"); 5053c3a9c75SAmlan Barua #endif 506b2b77a04SHong Zhang break; 507b2b77a04SHong Zhang case 3: 5086882372aSHong Zhang /* Get local size */ 5093c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 51051d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 51151d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 51251d1eed7SAmlan Barua if (fin) { 51351d1eed7SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 51451d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 51551d1eed7SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 51651d1eed7SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 51751d1eed7SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 51851d1eed7SAmlan Barua } 51951d1eed7SAmlan Barua if (fout) { 52051d1eed7SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 52151d1eed7SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 52251d1eed7SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 52351d1eed7SAmlan Barua } 52451d1eed7SAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 52551d1eed7SAmlan Barua 52651d1eed7SAmlan Barua 5273c3a9c75SAmlan Barua #else 5286882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 52911902ff2SHong Zhang // printf("The quantity n is %d",n); 5306882372aSHong Zhang if (fin) { 5316882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5326882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5336882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5346882372aSHong Zhang } 5356882372aSHong Zhang if (fout) { 5366882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5376882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5386882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5396882372aSHong Zhang } 5403c3a9c75SAmlan Barua #endif 541b2b77a04SHong Zhang break; 542b2b77a04SHong Zhang default: 5436882372aSHong Zhang /* Get local size */ 5443c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 545b3a17365SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 546b3a17365SAmlan Barua printf("The value of temp is %ld\n",temp); 547b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 548b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 549b3a17365SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 550b3a17365SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 551b3a17365SAmlan Barua if (fin) { 552b3a17365SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 553b3a17365SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 554b3a17365SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 555b3a17365SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 556b3a17365SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 557b3a17365SAmlan Barua } 558b3a17365SAmlan Barua if (fout) { 559b3a17365SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 560b3a17365SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 561b3a17365SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 562b3a17365SAmlan Barua } 563b3a17365SAmlan Barua 5643c3a9c75SAmlan Barua #else 565c92418ccSAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 56611902ff2SHong Zhang // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 56711902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 56811902ff2SHong Zhang // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 56911902ff2SHong Zhang // for(i=0;i<ndim;i++) 57011902ff2SHong Zhang // { 57111902ff2SHong Zhang // pdim[i]=dim[i];printf("%d",pdim[i]); 57211902ff2SHong Zhang // } 57311902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 57411902ff2SHong Zhang // printf("The quantity n is %d",n); 5756882372aSHong Zhang if (fin) { 5766882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5776882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5786882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5796882372aSHong Zhang } 5806882372aSHong Zhang if (fout) { 5816882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5826882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5836882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5846882372aSHong Zhang } 5853c3a9c75SAmlan Barua #endif 586b2b77a04SHong Zhang break; 587b2b77a04SHong Zhang } 588b2b77a04SHong Zhang } 58954dd5118SAmlan Barua // if (fin){ 59054dd5118SAmlan Barua // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 59154dd5118SAmlan Barua // } 59254dd5118SAmlan Barua // if (fout){ 59354dd5118SAmlan Barua // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 59454dd5118SAmlan Barua // } 595b2b77a04SHong Zhang PetscFunctionReturn(0); 596b2b77a04SHong Zhang } 597b2b77a04SHong Zhang 598b2b77a04SHong Zhang #undef __FUNCT__ 5993c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT" 6003c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 6013c3a9c75SAmlan Barua { 6023c3a9c75SAmlan Barua PetscErrorCode ierr; 6033c3a9c75SAmlan Barua PetscFunctionBegin; 6043c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 6053c3a9c75SAmlan Barua PetscFunctionReturn(0); 6063c3a9c75SAmlan Barua } 60754efbe56SHong Zhang 608b2b77a04SHong Zhang /* 6093c3a9c75SAmlan Barua InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block 6103c3a9c75SAmlan Barua Input A, x, y 6113c3a9c75SAmlan Barua A - FFTW matrix 61254dd5118SAmlan Barua x - user data 613b2b77a04SHong Zhang Options Database Keys: 614b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 615b2b77a04SHong Zhang 616b2b77a04SHong Zhang Level: intermediate 617b2b77a04SHong Zhang 618b2b77a04SHong Zhang */ 6193c3a9c75SAmlan Barua 6203c3a9c75SAmlan Barua EXTERN_C_BEGIN 6213c3a9c75SAmlan Barua #undef __FUNCT__ 6223c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW" 6233c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 6243c3a9c75SAmlan Barua { 6253c3a9c75SAmlan Barua PetscErrorCode ierr; 6263c3a9c75SAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 6273c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 6283c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 6293c3a9c75SAmlan Barua PetscInt N=fft->N, N1, n1 ,NM; 6303c3a9c75SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 631f76f76beSAmlan Barua PetscInt low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4; 632*e5d7f247SAmlan Barua PetscInt i,j,k,rank,size,partial_dim; 6333c3a9c75SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 634*e5d7f247SAmlan Barua ptrdiff_t local_n1,local_1_start,temp; 6353c3a9c75SAmlan Barua VecScatter vecscat; 6363c3a9c75SAmlan Barua IS list1,list2; 6373c3a9c75SAmlan Barua 638f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 639f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 6403c3a9c75SAmlan Barua ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 641f76f76beSAmlan Barua printf("Local ownership starts at %d\n",low); 6423c3a9c75SAmlan Barua 6433c3a9c75SAmlan Barua switch (ndim){ 6443c3a9c75SAmlan Barua case 1: 645*e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 6463c3a9c75SAmlan Barua break; 6473c3a9c75SAmlan Barua case 2: 6483c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 6493c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 6503c3a9c75SAmlan Barua 6515b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 6525b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 6535b3e41ffSAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 6543c3a9c75SAmlan Barua 6553c3a9c75SAmlan Barua if (dim[1]%2==0) 6563c3a9c75SAmlan Barua NM = dim[1]+2; 6573c3a9c75SAmlan Barua else 6583c3a9c75SAmlan Barua NM = dim[1]+1; 6593c3a9c75SAmlan Barua 6603c3a9c75SAmlan Barua for (i=0;i<local_n0;i++){ 6613c3a9c75SAmlan Barua for (j=0;j<dim[1];j++){ 6623c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 6633c3a9c75SAmlan Barua tempindx1 = i*NM + j; 6645b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 6653c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 6665b3e41ffSAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 6675b3e41ffSAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 6685b3e41ffSAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 6695b3e41ffSAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 6703c3a9c75SAmlan Barua } 6713c3a9c75SAmlan Barua } 6723c3a9c75SAmlan Barua 6733c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 6743c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 6753c3a9c75SAmlan Barua 676f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 677f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 678f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 679f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 6803c3a9c75SAmlan Barua break; 6813c3a9c75SAmlan Barua 6823c3a9c75SAmlan Barua case 3: 68351d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 68451d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 68551d1eed7SAmlan Barua 68651d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 68751d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 68851d1eed7SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 68951d1eed7SAmlan Barua 69051d1eed7SAmlan Barua if (dim[2]%2==0) 69151d1eed7SAmlan Barua NM = dim[2]+2; 69251d1eed7SAmlan Barua else 69351d1eed7SAmlan Barua NM = dim[2]+1; 69451d1eed7SAmlan Barua 69551d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 69651d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 69751d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 69851d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 69951d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 70051d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 70151d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 70251d1eed7SAmlan Barua } 70351d1eed7SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 70451d1eed7SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 70551d1eed7SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 70651d1eed7SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 70751d1eed7SAmlan Barua } 70851d1eed7SAmlan Barua } 70951d1eed7SAmlan Barua 71051d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 71151d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 71251d1eed7SAmlan Barua 71351d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 71451d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 71551d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 71651d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 7173c3a9c75SAmlan Barua break; 7183c3a9c75SAmlan Barua 7193c3a9c75SAmlan Barua default: 720*e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 721*e5d7f247SAmlan Barua printf("The value of temp is %ld\n",temp); 722*e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 723*e5d7f247SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 724*e5d7f247SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 725*e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 726*e5d7f247SAmlan Barua 727*e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 728*e5d7f247SAmlan Barua printf("The value of partial dim is %d\n",partial_dim); 729*e5d7f247SAmlan Barua 730*e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 731*e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 732*e5d7f247SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 733*e5d7f247SAmlan Barua 734*e5d7f247SAmlan Barua if (dim[ndim-1]%2==0) 735*e5d7f247SAmlan Barua NM = dim[ndim-1]+2; 736*e5d7f247SAmlan Barua else 737*e5d7f247SAmlan Barua NM = dim[ndim-1]+1; 738*e5d7f247SAmlan Barua 739*e5d7f247SAmlan Barua 740*e5d7f247SAmlan Barua 7413c3a9c75SAmlan Barua break; 7423c3a9c75SAmlan Barua } 7433c3a9c75SAmlan Barua 7443c3a9c75SAmlan Barua return 0; 7453c3a9c75SAmlan Barua } 7463c3a9c75SAmlan Barua EXTERN_C_END 7473c3a9c75SAmlan Barua 7483c3a9c75SAmlan Barua #undef __FUNCT__ 7493c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT" 7503c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 7513c3a9c75SAmlan Barua { 7523c3a9c75SAmlan Barua PetscErrorCode ierr; 7533c3a9c75SAmlan Barua PetscFunctionBegin; 7543c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 7553c3a9c75SAmlan Barua PetscFunctionReturn(0); 7563c3a9c75SAmlan Barua } 75754efbe56SHong Zhang 7585b3e41ffSAmlan Barua /* 7595b3e41ffSAmlan Barua OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 7605b3e41ffSAmlan Barua Input A, x, y 7615b3e41ffSAmlan Barua A - FFTW matrix 7625b3e41ffSAmlan Barua x - FFTW vector 7635b3e41ffSAmlan Barua y - PETSc vector that user can read 7645b3e41ffSAmlan Barua Options Database Keys: 7655b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 7665b3e41ffSAmlan Barua 7675b3e41ffSAmlan Barua Level: intermediate 7685b3e41ffSAmlan Barua 7693c3a9c75SAmlan Barua */ 7703c3a9c75SAmlan Barua 7713c3a9c75SAmlan Barua EXTERN_C_BEGIN 7723c3a9c75SAmlan Barua #undef __FUNCT__ 7735b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW" 7745b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 7755b3e41ffSAmlan Barua { 7765b3e41ffSAmlan Barua PetscErrorCode ierr; 7775b3e41ffSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 7785b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 7795b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 7805b3e41ffSAmlan Barua PetscInt N=fft->N, N1, n1 ,NM; 7815b3e41ffSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 7825b3e41ffSAmlan Barua PetscInt low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4; 78351d1eed7SAmlan Barua PetscInt i,j,k,rank,size; 7845b3e41ffSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 7855b3e41ffSAmlan Barua ptrdiff_t local_n1,local_1_start; 7865b3e41ffSAmlan Barua VecScatter vecscat; 7875b3e41ffSAmlan Barua IS list1,list2; 7885b3e41ffSAmlan Barua 7895b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 7905b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 791cfe1ae98SAmlan Barua ierr = VecGetOwnershipRange(x,&low,PETSC_NULL); 7925b3e41ffSAmlan Barua printf("Local ownership starts at %d\n",low); 7935b3e41ffSAmlan Barua 7945b3e41ffSAmlan Barua switch (ndim){ 7955b3e41ffSAmlan Barua case 1: 796*e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 7975b3e41ffSAmlan Barua break; 7985b3e41ffSAmlan Barua case 2: 7995b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 8005b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 8015b3e41ffSAmlan Barua 8025b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 8035b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 8045b3e41ffSAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 8055b3e41ffSAmlan Barua 8065b3e41ffSAmlan Barua if (dim[1]%2==0) 8075b3e41ffSAmlan Barua NM = dim[1]+2; 8085b3e41ffSAmlan Barua else 8095b3e41ffSAmlan Barua NM = dim[1]+1; 8105b3e41ffSAmlan Barua 8115b3e41ffSAmlan Barua 8125b3e41ffSAmlan Barua 8135b3e41ffSAmlan Barua for (i=0;i<local_n0;i++){ 8145b3e41ffSAmlan Barua for (j=0;j<dim[1];j++){ 8155b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 8165b3e41ffSAmlan Barua tempindx1 = i*NM + j; 8175b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 8185b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 819cfe1ae98SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 820cfe1ae98SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 821cfe1ae98SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 822cfe1ae98SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 8235b3e41ffSAmlan Barua } 8245b3e41ffSAmlan Barua } 8255b3e41ffSAmlan Barua 8265b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 8275b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 8285b3e41ffSAmlan Barua 8295b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 8305b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8315b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8325b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 8335b3e41ffSAmlan Barua break; 8345b3e41ffSAmlan Barua 8355b3e41ffSAmlan Barua case 3: 83651d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 83751d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 83851d1eed7SAmlan Barua 83951d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 84051d1eed7SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 84151d1eed7SAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 84251d1eed7SAmlan Barua 84351d1eed7SAmlan Barua if (dim[2]%2==0) 84451d1eed7SAmlan Barua NM = dim[2]+2; 84551d1eed7SAmlan Barua else 84651d1eed7SAmlan Barua NM = dim[2]+1; 84751d1eed7SAmlan Barua 84851d1eed7SAmlan Barua for (i=0;i<local_n0;i++){ 84951d1eed7SAmlan Barua for (j=0;j<dim[1];j++){ 85051d1eed7SAmlan Barua for (k=0;k<dim[2];k++){ 85151d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 85251d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 85351d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 85451d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 85551d1eed7SAmlan Barua } 85651d1eed7SAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 85751d1eed7SAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 85851d1eed7SAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 85951d1eed7SAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 86051d1eed7SAmlan Barua } 86151d1eed7SAmlan Barua } 86251d1eed7SAmlan Barua 86351d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 86451d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 86551d1eed7SAmlan Barua 86651d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 86751d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 86851d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 86951d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 8705b3e41ffSAmlan Barua break; 8715b3e41ffSAmlan Barua 8725b3e41ffSAmlan Barua default: 8735b3e41ffSAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet"); 8745b3e41ffSAmlan Barua break; 8755b3e41ffSAmlan Barua } 8765b3e41ffSAmlan Barua return 0; 8775b3e41ffSAmlan Barua } 8785b3e41ffSAmlan Barua EXTERN_C_END 8795b3e41ffSAmlan Barua 8805b3e41ffSAmlan Barua EXTERN_C_BEGIN 8815b3e41ffSAmlan Barua #undef __FUNCT__ 8823c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 8833c3a9c75SAmlan Barua /* 8843c3a9c75SAmlan Barua MatCreate_FFTW - Creates a matrix object that provides FFT 8853c3a9c75SAmlan Barua via the external package FFTW 8863c3a9c75SAmlan Barua Options Database Keys: 8873c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 8883c3a9c75SAmlan Barua 8893c3a9c75SAmlan Barua Level: intermediate 8903c3a9c75SAmlan Barua 8913c3a9c75SAmlan Barua */ 8923c3a9c75SAmlan Barua 893b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 894b2b77a04SHong Zhang { 895b2b77a04SHong Zhang PetscErrorCode ierr; 896b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 897b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 898b2b77a04SHong Zhang Mat_FFTW *fftw; 899b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 900b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 901b2b77a04SHong Zhang PetscBool flg; 902b3a17365SAmlan Barua PetscInt p_flag,partial_dim=1,ctr,N1; 90311902ff2SHong Zhang PetscMPIInt size,rank; 904b3a17365SAmlan Barua ptrdiff_t *pdim, temp; 9055b3e41ffSAmlan Barua ptrdiff_t local_n1,local_1_start; 906b2b77a04SHong Zhang 907b2b77a04SHong Zhang PetscFunctionBegin; 9083c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 9093c3a9c75SAmlan Barua // SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 9103c3a9c75SAmlan Barua //#endif 911b2b77a04SHong Zhang 912b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 91311902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 91454dd5118SAmlan Barua ierr = MPI_Barrier(PETSC_COMM_WORLD); 915c92418ccSAmlan Barua 91611902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 91711902ff2SHong Zhang pdim[0] = dim[0]; 91811902ff2SHong Zhang for(ctr=1;ctr<ndim;ctr++) 91911902ff2SHong Zhang { 9206882372aSHong Zhang partial_dim *= dim[ctr]; 92111902ff2SHong Zhang pdim[ctr] = dim[ctr]; 9226882372aSHong Zhang } 9233c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 9243c3a9c75SAmlan Barua // SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 9253c3a9c75SAmlan Barua //#endif 9263c3a9c75SAmlan Barua 92711902ff2SHong Zhang // printf("partial dimension is %d",partial_dim); 928b2b77a04SHong Zhang if (size == 1) { 929b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 930b2b77a04SHong Zhang n = N; 931b2b77a04SHong Zhang } else { 9326882372aSHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end; 933b2b77a04SHong Zhang switch (ndim){ 934b2b77a04SHong Zhang case 1: 9353c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 9363c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 937*e5d7f247SAmlan Barua #else 9386882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 9396882372aSHong Zhang n = (PetscInt)local_n0; 9406882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 941*e5d7f247SAmlan Barua #endif 9423c3a9c75SAmlan Barua // PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW); 943b2b77a04SHong Zhang break; 944b2b77a04SHong Zhang case 2: 9455b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 946b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 947b2b77a04SHong Zhang /* 948b2b77a04SHong Zhang PetscMPIInt rank; 949b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 950b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 951b2b77a04SHong Zhang */ 952b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 953b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 9545b3e41ffSAmlan Barua #else 9555b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 9565b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 9575b3e41ffSAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 9585b3e41ffSAmlan Barua #endif 959b2b77a04SHong Zhang break; 960b2b77a04SHong Zhang case 3: 96111902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 96251d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 96351d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 9646882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 9656882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 96651d1eed7SAmlan Barua #else 96751d1eed7SAmlan Barua printf("The code comes here\n"); 96851d1eed7SAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 96951d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 97051d1eed7SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 97151d1eed7SAmlan Barua #endif 972b2b77a04SHong Zhang break; 973b2b77a04SHong Zhang default: 974b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 97511902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 976c92418ccSAmlan Barua // printf("The value of alloc local is %ld from process %d\n",alloc_local,rank); 97711902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 9786882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 97911902ff2SHong Zhang // printf("New partial dimension is %d %d %d",n,N,ndim); 9806882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 981b3a17365SAmlan Barua #else 982b3a17365SAmlan Barua temp = pdim[ndim-1]; 983b3a17365SAmlan Barua pdim[ndim-1]= temp/2 + 1; 984b3a17365SAmlan Barua printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]); 985b3a17365SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 986b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 987b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 988b3a17365SAmlan Barua pdim[ndim-1] = temp; 989b3a17365SAmlan Barua printf("For Multi dim case n = %d, N1 = %d\n",n,N1); 990b3a17365SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 991b3a17365SAmlan Barua #endif 992b2b77a04SHong Zhang break; 993b2b77a04SHong Zhang } 994b2b77a04SHong Zhang } 995b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 996b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 997b2b77a04SHong Zhang fft->data = (void*)fftw; 998b2b77a04SHong Zhang 999b2b77a04SHong Zhang fft->n = n; 1000c92418ccSAmlan Barua fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1001*e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 1002c92418ccSAmlan Barua ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1003b1301b2eSHong Zhang for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1004c92418ccSAmlan Barua 1005b2b77a04SHong Zhang fftw->p_forward = 0; 1006b2b77a04SHong Zhang fftw->p_backward = 0; 1007b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1008b2b77a04SHong Zhang 1009b2b77a04SHong Zhang if (size == 1){ 1010b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1011b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1012b2b77a04SHong Zhang } else { 1013b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1014b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1015b2b77a04SHong Zhang } 1016b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1017b2b77a04SHong Zhang A->ops->getvecs = MatGetVecs_FFTW; 1018b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 10193c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 10203c3a9c75SAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 10215b3e41ffSAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 10223c3a9c75SAmlan Barua #endif 1023b2b77a04SHong Zhang 1024b2b77a04SHong Zhang /* get runtime options */ 1025b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1026b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1027b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 1028b2b77a04SHong Zhang PetscOptionsEnd(); 1029b2b77a04SHong Zhang PetscFunctionReturn(0); 1030b2b77a04SHong Zhang } 1031b2b77a04SHong Zhang EXTERN_C_END 10323c3a9c75SAmlan Barua 10333c3a9c75SAmlan Barua 10343c3a9c75SAmlan Barua 10353c3a9c75SAmlan Barua 1036