1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 15b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 16b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 17b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 18b2b77a04SHong Zhang } Mat_FFTW; 19b2b77a04SHong Zhang 20b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 21b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 25b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 26b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 27b2b77a04SHong Zhang 28b2b77a04SHong Zhang #undef __FUNCT__ 29b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 30b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 31b2b77a04SHong Zhang { 32b2b77a04SHong Zhang PetscErrorCode ierr; 33b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 34b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 35b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 36b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 37b2b77a04SHong Zhang 38b2b77a04SHong Zhang PetscFunctionBegin; 39b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 40b9ae062cSHong Zhang 41b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 42b2b77a04SHong Zhang #endif 43b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 44b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 45b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 46b2b77a04SHong Zhang switch (ndim){ 47b2b77a04SHong Zhang case 1: 48b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 49b2b77a04SHong Zhang break; 50b2b77a04SHong Zhang case 2: 51b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 52b2b77a04SHong Zhang break; 53b2b77a04SHong Zhang case 3: 54b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 55b2b77a04SHong Zhang break; 56b2b77a04SHong Zhang default: 57b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 58b2b77a04SHong Zhang break; 59b2b77a04SHong Zhang } 60b2b77a04SHong Zhang fftw->finarray = x_array; 61b2b77a04SHong Zhang fftw->foutarray = y_array; 62b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 63b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 64b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 65b2b77a04SHong Zhang } else { /* use existing plan */ 66b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 67b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 68b2b77a04SHong Zhang } else { 69b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 70b2b77a04SHong Zhang } 71b2b77a04SHong Zhang } 72b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 73b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 74b2b77a04SHong Zhang PetscFunctionReturn(0); 75b2b77a04SHong Zhang } 76b2b77a04SHong Zhang 77b2b77a04SHong Zhang #undef __FUNCT__ 78b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 79b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 80b2b77a04SHong Zhang { 81b2b77a04SHong Zhang PetscErrorCode ierr; 82b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 83b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 84b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 85b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 86b2b77a04SHong Zhang 87b2b77a04SHong Zhang PetscFunctionBegin; 88b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 89b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 90b2b77a04SHong Zhang #endif 91b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 92b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 93b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 94b2b77a04SHong Zhang switch (ndim){ 95b2b77a04SHong Zhang case 1: 96b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 97b2b77a04SHong Zhang break; 98b2b77a04SHong Zhang case 2: 99b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 100b2b77a04SHong Zhang break; 101b2b77a04SHong Zhang case 3: 102b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 103b2b77a04SHong Zhang break; 104b2b77a04SHong Zhang default: 105b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 106b2b77a04SHong Zhang break; 107b2b77a04SHong Zhang } 108b2b77a04SHong Zhang fftw->binarray = x_array; 109b2b77a04SHong Zhang fftw->boutarray = y_array; 110b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 111b2b77a04SHong Zhang } else { /* use existing plan */ 112b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 113b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 114b2b77a04SHong Zhang } else { 115b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 116b2b77a04SHong Zhang } 117b2b77a04SHong Zhang } 118b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 119b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 120b2b77a04SHong Zhang PetscFunctionReturn(0); 121b2b77a04SHong Zhang } 122b2b77a04SHong Zhang 123b2b77a04SHong Zhang #undef __FUNCT__ 124b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 125b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 126b2b77a04SHong Zhang { 127b2b77a04SHong Zhang PetscErrorCode ierr; 128b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 129b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 130b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 131*c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 132b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 133*c92418ccSAmlan Barua // PetscInt ctr; 134*c92418ccSAmlan Barua // ptrdiff_t ndim1=(ptrdiff_t) ndim,*pdim; 135*c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 136*c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 137*c92418ccSAmlan Barua 138*c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 139*c92418ccSAmlan Barua // { 140*c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 141*c92418ccSAmlan Barua // } 142b2b77a04SHong Zhang 143b2b77a04SHong Zhang PetscFunctionBegin; 144b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 145b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 146b2b77a04SHong Zhang #endif 147*c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 148*c92418ccSAmlan Barua // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 14911902ff2SHong Zhang 150b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 151b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 152b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 153b2b77a04SHong Zhang switch (ndim){ 154b2b77a04SHong Zhang case 1: 155b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 156b2b77a04SHong Zhang break; 157b2b77a04SHong Zhang case 2: 158b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 159b2b77a04SHong Zhang break; 160b2b77a04SHong Zhang case 3: 161b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 162b2b77a04SHong Zhang break; 163b2b77a04SHong Zhang default: 164*c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 16511902ff2SHong Zhang // fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 166b2b77a04SHong Zhang break; 167b2b77a04SHong Zhang } 168b2b77a04SHong Zhang fftw->finarray = x_array; 169b2b77a04SHong Zhang fftw->foutarray = y_array; 170b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 171b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 172b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 173b2b77a04SHong Zhang } else { /* use existing plan */ 174b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 175b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 176b2b77a04SHong Zhang } else { 177b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 178b2b77a04SHong Zhang } 179b2b77a04SHong Zhang } 180b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 181b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 182b2b77a04SHong Zhang PetscFunctionReturn(0); 183b2b77a04SHong Zhang } 184b2b77a04SHong Zhang 185b2b77a04SHong Zhang #undef __FUNCT__ 186b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 187b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 188b2b77a04SHong Zhang { 189b2b77a04SHong Zhang PetscErrorCode ierr; 190b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 191b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 192b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 193*c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 194b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 195*c92418ccSAmlan Barua // PetscInt ctr; 196*c92418ccSAmlan Barua // ptrdiff_t ndim1=(ptrdiff_t)ndim,*pdim; 197*c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 198*c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 199*c92418ccSAmlan Barua 200*c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 201*c92418ccSAmlan Barua // { 202*c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 203*c92418ccSAmlan Barua // } 204b2b77a04SHong Zhang 205b2b77a04SHong Zhang PetscFunctionBegin; 206b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 207b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 208b2b77a04SHong Zhang #endif 209*c92418ccSAmlan Barua // ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr); 210*c92418ccSAmlan Barua // should pdim be a member of Mat_FFTW? 211*c92418ccSAmlan Barua // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 21211902ff2SHong Zhang 213b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 214b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 215b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 216b2b77a04SHong Zhang switch (ndim){ 217b2b77a04SHong Zhang case 1: 218b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 219b2b77a04SHong Zhang break; 220b2b77a04SHong Zhang case 2: 221b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 222b2b77a04SHong Zhang break; 223b2b77a04SHong Zhang case 3: 224b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 225b2b77a04SHong Zhang break; 226b2b77a04SHong Zhang default: 227*c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 228*c92418ccSAmlan Barua // fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 229b2b77a04SHong Zhang break; 230b2b77a04SHong Zhang } 231b2b77a04SHong Zhang fftw->binarray = x_array; 232b2b77a04SHong Zhang fftw->boutarray = y_array; 233b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 234b2b77a04SHong Zhang } else { /* use existing plan */ 235b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 236b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 237b2b77a04SHong Zhang } else { 238b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 239b2b77a04SHong Zhang } 240b2b77a04SHong Zhang } 241b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 242b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 243b2b77a04SHong Zhang PetscFunctionReturn(0); 244b2b77a04SHong Zhang } 245b2b77a04SHong Zhang 246b2b77a04SHong Zhang #undef __FUNCT__ 247b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 248b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 249b2b77a04SHong Zhang { 250b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 251b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 252b2b77a04SHong Zhang PetscErrorCode ierr; 253b2b77a04SHong Zhang 254b2b77a04SHong Zhang PetscFunctionBegin; 255b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 256b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 257b2b77a04SHong Zhang #endif 258b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 259b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 260bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 261b2b77a04SHong Zhang PetscFunctionReturn(0); 262b2b77a04SHong Zhang } 263b2b77a04SHong Zhang 264c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 265b2b77a04SHong Zhang #undef __FUNCT__ 266b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 267b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 268b2b77a04SHong Zhang { 269b2b77a04SHong Zhang PetscErrorCode ierr; 270b2b77a04SHong Zhang PetscScalar *array; 271b2b77a04SHong Zhang 272b2b77a04SHong Zhang PetscFunctionBegin; 273b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 274b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 275b2b77a04SHong Zhang #endif 276b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 277b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 278b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 279b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 280b2b77a04SHong Zhang PetscFunctionReturn(0); 281b2b77a04SHong Zhang } 282b2b77a04SHong Zhang 283b2b77a04SHong Zhang #undef __FUNCT__ 284*c92418ccSAmlan Barua #define __FUNCT__ "MatGetVecs_FFTW1D" 285*c92418ccSAmlan Barua /* 286*c92418ccSAmlan Barua MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the 287*c92418ccSAmlan Barua parallel layout determined by FFTW-1D 288*c92418ccSAmlan Barua 289*c92418ccSAmlan Barua */ 290*c92418ccSAmlan Barua PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout) 291*c92418ccSAmlan Barua { 292*c92418ccSAmlan Barua PetscErrorCode ierr; 293*c92418ccSAmlan Barua PetscMPIInt size,rank; 294*c92418ccSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 295*c92418ccSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 296*c92418ccSAmlan Barua // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 297*c92418ccSAmlan Barua PetscInt N=fft->N; 298*c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 299*c92418ccSAmlan Barua ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 300*c92418ccSAmlan Barua ptrdiff_t f_local_n1,f_local_1_end; 301*c92418ccSAmlan Barua ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 302*c92418ccSAmlan Barua ptrdiff_t b_local_n1,b_local_1_end; 303*c92418ccSAmlan Barua fftw_complex *data_fin,*data_fout,*data_bin,*data_bout; 304*c92418ccSAmlan Barua 305*c92418ccSAmlan Barua PetscFunctionBegin; 306*c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 307*c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 308*c92418ccSAmlan Barua #endif 309*c92418ccSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 310*c92418ccSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 311*c92418ccSAmlan Barua if (size == 1){ 312*c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 313*c92418ccSAmlan Barua } 314*c92418ccSAmlan Barua else { 315*c92418ccSAmlan Barua if (ndim>1){ 316*c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 317*c92418ccSAmlan Barua else { 318*c92418ccSAmlan Barua f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 319*c92418ccSAmlan Barua b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 320*c92418ccSAmlan Barua if (fin) { 321*c92418ccSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 322*c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 323*c92418ccSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 324*c92418ccSAmlan Barua } 325*c92418ccSAmlan Barua if (fout) { 326*c92418ccSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 327*c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 328*c92418ccSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 329*c92418ccSAmlan Barua } 330*c92418ccSAmlan Barua if (bin) { 331*c92418ccSAmlan Barua data_bin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 332*c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr); 333*c92418ccSAmlan Barua (*bin)->ops->destroy = VecDestroy_MPIFFTW; 334*c92418ccSAmlan Barua } 335*c92418ccSAmlan Barua if (bout) { 336*c92418ccSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 337*c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 338*c92418ccSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 339*c92418ccSAmlan Barua } 340*c92418ccSAmlan Barua } 341*c92418ccSAmlan Barua if (fin){ 342*c92418ccSAmlan Barua ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 343*c92418ccSAmlan Barua } 344*c92418ccSAmlan Barua if (fout){ 345*c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 346*c92418ccSAmlan Barua } 347*c92418ccSAmlan Barua if (bin){ 348*c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr); 349*c92418ccSAmlan Barua } 350*c92418ccSAmlan Barua if (bout){ 351*c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 352*c92418ccSAmlan Barua } 353*c92418ccSAmlan Barua PetscFunctionReturn(0); 354*c92418ccSAmlan Barua } 355*c92418ccSAmlan Barua 356*c92418ccSAmlan Barua 357*c92418ccSAmlan Barua 358*c92418ccSAmlan Barua 359*c92418ccSAmlan Barua 360*c92418ccSAmlan Barua 361*c92418ccSAmlan Barua 362*c92418ccSAmlan Barua } 363*c92418ccSAmlan Barua #undef __FUNCT__ 364b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW" 365b2b77a04SHong Zhang /* 366b2b77a04SHong Zhang MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 367b2b77a04SHong Zhang parallel layout determined by FFTW 368b2b77a04SHong Zhang 369b2b77a04SHong Zhang Collective on Mat 370b2b77a04SHong Zhang 371b2b77a04SHong Zhang Input Parameter: 372b2b77a04SHong Zhang . mat - the matrix 373b2b77a04SHong Zhang 374b2b77a04SHong Zhang Output Parameter: 375b2b77a04SHong Zhang + fin - (optional) input vector of forward FFTW 376b2b77a04SHong Zhang - fout - (optional) output vector of forward FFTW 377b2b77a04SHong Zhang 378b2b77a04SHong Zhang Level: advanced 379b2b77a04SHong Zhang 380b2b77a04SHong Zhang .seealso: MatCreateFFTW() 381b2b77a04SHong Zhang */ 382b2b77a04SHong Zhang PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 383b2b77a04SHong Zhang { 384b2b77a04SHong Zhang PetscErrorCode ierr; 385b2b77a04SHong Zhang PetscMPIInt size,rank; 386b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 387b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 388*c92418ccSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 389b2b77a04SHong Zhang PetscInt N=fft->N; 390b2b77a04SHong Zhang 391b2b77a04SHong Zhang PetscFunctionBegin; 392b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 393b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 394b2b77a04SHong Zhang #endif 395b2b77a04SHong Zhang PetscValidHeaderSpecific(A,MAT_CLASSID,1); 396b2b77a04SHong Zhang PetscValidType(A,1); 397b2b77a04SHong Zhang 398b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 399b2b77a04SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 400b2b77a04SHong Zhang if (size == 1){ /* sequential case */ 401b2b77a04SHong Zhang if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 402b2b77a04SHong Zhang if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 403b2b77a04SHong Zhang } else { /* mpi case */ 404b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 4056882372aSHong Zhang ptrdiff_t local_n1,local_1_end; 406*c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 407b2b77a04SHong Zhang fftw_complex *data_fin,*data_fout; 408*c92418ccSAmlan Barua // PetscInt ctr; 409*c92418ccSAmlan Barua // ptrdiff_t ndim1,*pdim; 410*c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 411*c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 41211902ff2SHong Zhang 413*c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 414*c92418ccSAmlan Barua // { 415*c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 416*c92418ccSAmlan Barua // } 417b2b77a04SHong Zhang 418b2b77a04SHong Zhang switch (ndim){ 419b2b77a04SHong Zhang case 1: 4206882372aSHong Zhang /* Get local size */ 421*c92418ccSAmlan Barua /* We need to write an error message here saying that one cannot call this routine when doing paralllel 1D complex FFTW */ 422*c92418ccSAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 4236882372aSHong Zhang 4246882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 4256882372aSHong Zhang if (fin) { 4266882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4276882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4286882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4296882372aSHong Zhang } 4306882372aSHong Zhang if (fout) { 4316882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4326882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4336882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4346882372aSHong Zhang } 435b2b77a04SHong Zhang break; 436b2b77a04SHong Zhang case 2: 437b2b77a04SHong Zhang /* Get local size */ 438b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 439b2b77a04SHong Zhang if (fin) { 440b2b77a04SHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 441b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 442b2b77a04SHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 443b2b77a04SHong Zhang } 444b2b77a04SHong Zhang if (fout) { 445b2b77a04SHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 446b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 447b2b77a04SHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 448b2b77a04SHong Zhang } 449b2b77a04SHong Zhang break; 450b2b77a04SHong Zhang case 3: 4516882372aSHong Zhang /* Get local size */ 4526882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 45311902ff2SHong Zhang // printf("The quantity n is %d",n); 4546882372aSHong Zhang if (fin) { 4556882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4566882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4576882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4586882372aSHong Zhang } 4596882372aSHong Zhang if (fout) { 4606882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4616882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4626882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4636882372aSHong Zhang } 464b2b77a04SHong Zhang break; 465b2b77a04SHong Zhang default: 4666882372aSHong Zhang /* Get local size */ 467*c92418ccSAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 46811902ff2SHong Zhang // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 46911902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 47011902ff2SHong Zhang // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 47111902ff2SHong Zhang // for(i=0;i<ndim;i++) 47211902ff2SHong Zhang // { 47311902ff2SHong Zhang // pdim[i]=dim[i];printf("%d",pdim[i]); 47411902ff2SHong Zhang // } 47511902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 47611902ff2SHong Zhang // printf("The quantity n is %d",n); 4776882372aSHong Zhang if (fin) { 4786882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4796882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4806882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4816882372aSHong Zhang } 4826882372aSHong Zhang if (fout) { 4836882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4846882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4856882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4866882372aSHong Zhang } 487b2b77a04SHong Zhang break; 488b2b77a04SHong Zhang } 489b2b77a04SHong Zhang } 490fb7c10e0SHong Zhang if (fin){ 491262f75f9SHong Zhang ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 492fb7c10e0SHong Zhang } 493fb7c10e0SHong Zhang if (fout){ 494262f75f9SHong Zhang ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 495fb7c10e0SHong Zhang } 496b2b77a04SHong Zhang PetscFunctionReturn(0); 497b2b77a04SHong Zhang } 498b2b77a04SHong Zhang 499b2b77a04SHong Zhang EXTERN_C_BEGIN 500b2b77a04SHong Zhang #undef __FUNCT__ 501b2b77a04SHong Zhang #define __FUNCT__ "MatCreate_FFTW" 502b2b77a04SHong Zhang /* 503b2b77a04SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT 504b2b77a04SHong Zhang via the external package FFTW 505b2b77a04SHong Zhang 506b2b77a04SHong Zhang Options Database Keys: 507b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 508b2b77a04SHong Zhang 509b2b77a04SHong Zhang Level: intermediate 510b2b77a04SHong Zhang 511b2b77a04SHong Zhang */ 512b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 513b2b77a04SHong Zhang { 514b2b77a04SHong Zhang PetscErrorCode ierr; 515b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 516b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 517b2b77a04SHong Zhang Mat_FFTW *fftw; 518b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 519b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 520b2b77a04SHong Zhang PetscBool flg; 5216882372aSHong Zhang PetscInt p_flag,partial_dim=1,ctr; 52211902ff2SHong Zhang PetscMPIInt size,rank; 52311902ff2SHong Zhang ptrdiff_t *pdim; 524b2b77a04SHong Zhang 525b2b77a04SHong Zhang PetscFunctionBegin; 526b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 527b2b77a04SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 528b2b77a04SHong Zhang #endif 529b2b77a04SHong Zhang 530b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 53111902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 532*c92418ccSAmlan Barua 53311902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 53411902ff2SHong Zhang pdim[0] = dim[0]; 53511902ff2SHong Zhang for(ctr=1;ctr<ndim;ctr++) 53611902ff2SHong Zhang { 5376882372aSHong Zhang partial_dim*= dim[ctr]; 53811902ff2SHong Zhang pdim[ctr] = dim[ctr]; 5396882372aSHong Zhang } 54011902ff2SHong Zhang // printf("partial dimension is %d",partial_dim); 541b2b77a04SHong Zhang if (size == 1) { 542b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 543b2b77a04SHong Zhang n = N; 544b2b77a04SHong Zhang } else { 5456882372aSHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end; 546b2b77a04SHong Zhang switch (ndim){ 547b2b77a04SHong Zhang case 1: 5486882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 5496882372aSHong Zhang n = (PetscInt)local_n0; 5506882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 551*c92418ccSAmlan Barua // PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs_FFTW1D_C","MatGetVecs_FFTW1D",MatGetVecs_FFTW1D); 552b2b77a04SHong Zhang break; 553b2b77a04SHong Zhang case 2: 554b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 555b2b77a04SHong Zhang /* 556b2b77a04SHong Zhang PetscMPIInt rank; 557b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 558b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 559b2b77a04SHong Zhang */ 560b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 561b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 562b2b77a04SHong Zhang break; 563b2b77a04SHong Zhang case 3: 5646882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 56511902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 5666882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 5676882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 568b2b77a04SHong Zhang break; 569b2b77a04SHong Zhang default: 57011902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 571*c92418ccSAmlan Barua // printf("The value of alloc local is %ld from process %d\n",alloc_local,rank); 57211902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 5736882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 57411902ff2SHong Zhang // printf("New partial dimension is %d %d %d",n,N,ndim); 5756882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 576b2b77a04SHong Zhang break; 577b2b77a04SHong Zhang } 578b2b77a04SHong Zhang } 579b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 580b2b77a04SHong Zhang 581b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 582b2b77a04SHong Zhang fft->data = (void*)fftw; 583b2b77a04SHong Zhang 584b2b77a04SHong Zhang fft->n = n; 585*c92418ccSAmlan Barua fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 586*c92418ccSAmlan Barua ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 587*c92418ccSAmlan Barua // (fftw->dim_fftw)=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 588*c92418ccSAmlan Barua for(ctr=0;ctr<ndim;ctr++) 589*c92418ccSAmlan Barua { 590*c92418ccSAmlan Barua (fftw->dim_fftw)[ctr]=dim[ctr]; 591*c92418ccSAmlan Barua } 592*c92418ccSAmlan Barua 593b2b77a04SHong Zhang fftw->p_forward = 0; 594b2b77a04SHong Zhang fftw->p_backward = 0; 595b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 596b2b77a04SHong Zhang 597b2b77a04SHong Zhang if (size == 1){ 598b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 599b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 600b2b77a04SHong Zhang } else { 601b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 602b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 603b2b77a04SHong Zhang } 604b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 605b2b77a04SHong Zhang A->ops->getvecs = MatGetVecs_FFTW; 606b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 607b2b77a04SHong Zhang 608b2b77a04SHong Zhang /* get runtime options */ 609b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 610b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 611b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 612b2b77a04SHong Zhang PetscOptionsEnd(); 613b2b77a04SHong Zhang PetscFunctionReturn(0); 614b2b77a04SHong Zhang } 615b2b77a04SHong Zhang EXTERN_C_END 616