1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 15b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 16b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 17b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 18b2b77a04SHong Zhang } Mat_FFTW; 19b2b77a04SHong Zhang 20b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 21b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 25b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 26b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 27b2b77a04SHong Zhang 28b2b77a04SHong Zhang #undef __FUNCT__ 29b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 30b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 31b2b77a04SHong Zhang { 32b2b77a04SHong Zhang PetscErrorCode ierr; 33b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 34b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 35b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 36b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 37b2b77a04SHong Zhang 38b2b77a04SHong Zhang PetscFunctionBegin; 39b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 40b9ae062cSHong Zhang 41b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 42b2b77a04SHong Zhang #endif 43b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 44b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 45b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 46b2b77a04SHong Zhang switch (ndim){ 47b2b77a04SHong Zhang case 1: 48b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 49b2b77a04SHong Zhang break; 50b2b77a04SHong Zhang case 2: 51b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 52b2b77a04SHong Zhang break; 53b2b77a04SHong Zhang case 3: 54b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 55b2b77a04SHong Zhang break; 56b2b77a04SHong Zhang default: 57b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 58b2b77a04SHong Zhang break; 59b2b77a04SHong Zhang } 60b2b77a04SHong Zhang fftw->finarray = x_array; 61b2b77a04SHong Zhang fftw->foutarray = y_array; 62b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 63b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 64b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 65b2b77a04SHong Zhang } else { /* use existing plan */ 66b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 67b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 68b2b77a04SHong Zhang } else { 69b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 70b2b77a04SHong Zhang } 71b2b77a04SHong Zhang } 72b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 73b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 74b2b77a04SHong Zhang PetscFunctionReturn(0); 75b2b77a04SHong Zhang } 76b2b77a04SHong Zhang 77b2b77a04SHong Zhang #undef __FUNCT__ 78b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 79b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 80b2b77a04SHong Zhang { 81b2b77a04SHong Zhang PetscErrorCode ierr; 82b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 83b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 84b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 85b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 86b2b77a04SHong Zhang 87b2b77a04SHong Zhang PetscFunctionBegin; 88b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 89b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 90b2b77a04SHong Zhang #endif 91b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 92b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 93b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 94b2b77a04SHong Zhang switch (ndim){ 95b2b77a04SHong Zhang case 1: 96b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 97b2b77a04SHong Zhang break; 98b2b77a04SHong Zhang case 2: 99b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 100b2b77a04SHong Zhang break; 101b2b77a04SHong Zhang case 3: 102b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 103b2b77a04SHong Zhang break; 104b2b77a04SHong Zhang default: 105b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 106b2b77a04SHong Zhang break; 107b2b77a04SHong Zhang } 108b2b77a04SHong Zhang fftw->binarray = x_array; 109b2b77a04SHong Zhang fftw->boutarray = y_array; 110b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 111b2b77a04SHong Zhang } else { /* use existing plan */ 112b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 113b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 114b2b77a04SHong Zhang } else { 115b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 116b2b77a04SHong Zhang } 117b2b77a04SHong Zhang } 118b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 119b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 120b2b77a04SHong Zhang PetscFunctionReturn(0); 121b2b77a04SHong Zhang } 122b2b77a04SHong Zhang 123b2b77a04SHong Zhang #undef __FUNCT__ 124b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 125b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 126b2b77a04SHong Zhang { 127b2b77a04SHong Zhang PetscErrorCode ierr; 128b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 129b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 130b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 131c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 132b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 133c92418ccSAmlan Barua // PetscInt ctr; 134c92418ccSAmlan Barua // ptrdiff_t ndim1=(ptrdiff_t) ndim,*pdim; 135c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 136c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 137c92418ccSAmlan Barua 138c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 139c92418ccSAmlan Barua // { 140c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 141c92418ccSAmlan Barua // } 142b2b77a04SHong Zhang 143b2b77a04SHong Zhang PetscFunctionBegin; 144b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 145b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 146b2b77a04SHong Zhang #endif 147c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 148c92418ccSAmlan Barua // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 14911902ff2SHong Zhang 150b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 151b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 152b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 153b2b77a04SHong Zhang switch (ndim){ 154b2b77a04SHong Zhang case 1: 155b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 156b2b77a04SHong Zhang break; 157b2b77a04SHong Zhang case 2: 158b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 159b2b77a04SHong Zhang break; 160b2b77a04SHong Zhang case 3: 161b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 162b2b77a04SHong Zhang break; 163b2b77a04SHong Zhang default: 164c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 16511902ff2SHong Zhang // fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 166b2b77a04SHong Zhang break; 167b2b77a04SHong Zhang } 168b2b77a04SHong Zhang fftw->finarray = x_array; 169b2b77a04SHong Zhang fftw->foutarray = y_array; 170b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 171b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 172b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 173b2b77a04SHong Zhang } else { /* use existing plan */ 174b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 175b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 176b2b77a04SHong Zhang } else { 177b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 178b2b77a04SHong Zhang } 179b2b77a04SHong Zhang } 180b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 181b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 182b2b77a04SHong Zhang PetscFunctionReturn(0); 183b2b77a04SHong Zhang } 184b2b77a04SHong Zhang 185b2b77a04SHong Zhang #undef __FUNCT__ 186b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 187b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 188b2b77a04SHong Zhang { 189b2b77a04SHong Zhang PetscErrorCode ierr; 190b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 191b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 192b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 193c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 194b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 195c92418ccSAmlan Barua // PetscInt ctr; 196c92418ccSAmlan Barua // ptrdiff_t ndim1=(ptrdiff_t)ndim,*pdim; 197c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 198c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 199c92418ccSAmlan Barua 200c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 201c92418ccSAmlan Barua // { 202c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 203c92418ccSAmlan Barua // } 204b2b77a04SHong Zhang 205b2b77a04SHong Zhang PetscFunctionBegin; 206b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 207b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 208b2b77a04SHong Zhang #endif 209c92418ccSAmlan Barua // ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr); 210c92418ccSAmlan Barua // should pdim be a member of Mat_FFTW? 211c92418ccSAmlan Barua // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 21211902ff2SHong Zhang 213b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 214b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 215b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 216b2b77a04SHong Zhang switch (ndim){ 217b2b77a04SHong Zhang case 1: 218b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 219b2b77a04SHong Zhang break; 220b2b77a04SHong Zhang case 2: 221b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 222b2b77a04SHong Zhang break; 223b2b77a04SHong Zhang case 3: 224b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 225b2b77a04SHong Zhang break; 226b2b77a04SHong Zhang default: 227c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 228c92418ccSAmlan Barua // fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 229b2b77a04SHong Zhang break; 230b2b77a04SHong Zhang } 231b2b77a04SHong Zhang fftw->binarray = x_array; 232b2b77a04SHong Zhang fftw->boutarray = y_array; 233b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 234b2b77a04SHong Zhang } else { /* use existing plan */ 235b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 236b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 237b2b77a04SHong Zhang } else { 238b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 239b2b77a04SHong Zhang } 240b2b77a04SHong Zhang } 241b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 242b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 243b2b77a04SHong Zhang PetscFunctionReturn(0); 244b2b77a04SHong Zhang } 245b2b77a04SHong Zhang 246b2b77a04SHong Zhang #undef __FUNCT__ 247b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 248b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 249b2b77a04SHong Zhang { 250b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 251b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 252b2b77a04SHong Zhang PetscErrorCode ierr; 253b2b77a04SHong Zhang 254b2b77a04SHong Zhang PetscFunctionBegin; 255b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 256b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 257b2b77a04SHong Zhang #endif 258b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 259b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 260*b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 261bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 262b2b77a04SHong Zhang PetscFunctionReturn(0); 263b2b77a04SHong Zhang } 264b2b77a04SHong Zhang 265c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 266b2b77a04SHong Zhang #undef __FUNCT__ 267b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 268b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 269b2b77a04SHong Zhang { 270b2b77a04SHong Zhang PetscErrorCode ierr; 271b2b77a04SHong Zhang PetscScalar *array; 272b2b77a04SHong Zhang 273b2b77a04SHong Zhang PetscFunctionBegin; 274b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 275b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 276b2b77a04SHong Zhang #endif 277b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 278b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 279b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 280b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 281b2b77a04SHong Zhang PetscFunctionReturn(0); 282b2b77a04SHong Zhang } 283b2b77a04SHong Zhang 284b2b77a04SHong Zhang #undef __FUNCT__ 285c92418ccSAmlan Barua #define __FUNCT__ "MatGetVecs_FFTW1D" 286c92418ccSAmlan Barua /* 287c92418ccSAmlan Barua MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the 288c92418ccSAmlan Barua parallel layout determined by FFTW-1D 289c92418ccSAmlan Barua 290c92418ccSAmlan Barua */ 291c92418ccSAmlan Barua PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout) 292c92418ccSAmlan Barua { 293c92418ccSAmlan Barua PetscErrorCode ierr; 294c92418ccSAmlan Barua PetscMPIInt size,rank; 295c92418ccSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 296c92418ccSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 297c92418ccSAmlan Barua // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 298c92418ccSAmlan Barua PetscInt N=fft->N; 299c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 300c92418ccSAmlan Barua ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 301c92418ccSAmlan Barua ptrdiff_t f_local_n1,f_local_1_end; 302c92418ccSAmlan Barua ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 303c92418ccSAmlan Barua ptrdiff_t b_local_n1,b_local_1_end; 304c92418ccSAmlan Barua fftw_complex *data_fin,*data_fout,*data_bin,*data_bout; 305c92418ccSAmlan Barua 306c92418ccSAmlan Barua PetscFunctionBegin; 307c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 308c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 309c92418ccSAmlan Barua #endif 310c92418ccSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 311c92418ccSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 312c92418ccSAmlan Barua if (size == 1){ 313c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 314c92418ccSAmlan Barua } 315c92418ccSAmlan Barua else { 316c92418ccSAmlan Barua if (ndim>1){ 317c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 318c92418ccSAmlan Barua else { 319c92418ccSAmlan Barua f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 320c92418ccSAmlan Barua b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 321c92418ccSAmlan Barua if (fin) { 322c92418ccSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 323c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 324c92418ccSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 325c92418ccSAmlan Barua } 326c92418ccSAmlan Barua if (fout) { 327c92418ccSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 328c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 329c92418ccSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 330c92418ccSAmlan Barua } 331c92418ccSAmlan Barua if (bin) { 332c92418ccSAmlan Barua data_bin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 333c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr); 334c92418ccSAmlan Barua (*bin)->ops->destroy = VecDestroy_MPIFFTW; 335c92418ccSAmlan Barua } 336c92418ccSAmlan Barua if (bout) { 337c92418ccSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 338c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 339c92418ccSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 340c92418ccSAmlan Barua } 341c92418ccSAmlan Barua } 342c92418ccSAmlan Barua if (fin){ 343c92418ccSAmlan Barua ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 344c92418ccSAmlan Barua } 345c92418ccSAmlan Barua if (fout){ 346c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 347c92418ccSAmlan Barua } 348c92418ccSAmlan Barua if (bin){ 349c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr); 350c92418ccSAmlan Barua } 351c92418ccSAmlan Barua if (bout){ 352c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 353c92418ccSAmlan Barua } 354c92418ccSAmlan Barua PetscFunctionReturn(0); 355c92418ccSAmlan Barua } 356c92418ccSAmlan Barua 357c92418ccSAmlan Barua 358c92418ccSAmlan Barua 359c92418ccSAmlan Barua 360c92418ccSAmlan Barua 361c92418ccSAmlan Barua 362c92418ccSAmlan Barua 363c92418ccSAmlan Barua } 364c92418ccSAmlan Barua #undef __FUNCT__ 365b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW" 366b2b77a04SHong Zhang /* 367b2b77a04SHong Zhang MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 368b2b77a04SHong Zhang parallel layout determined by FFTW 369b2b77a04SHong Zhang 370b2b77a04SHong Zhang Collective on Mat 371b2b77a04SHong Zhang 372b2b77a04SHong Zhang Input Parameter: 373b2b77a04SHong Zhang . mat - the matrix 374b2b77a04SHong Zhang 375b2b77a04SHong Zhang Output Parameter: 376b2b77a04SHong Zhang + fin - (optional) input vector of forward FFTW 377b2b77a04SHong Zhang - fout - (optional) output vector of forward FFTW 378b2b77a04SHong Zhang 379b2b77a04SHong Zhang Level: advanced 380b2b77a04SHong Zhang 381b2b77a04SHong Zhang .seealso: MatCreateFFTW() 382b2b77a04SHong Zhang */ 383b2b77a04SHong Zhang PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 384b2b77a04SHong Zhang { 385b2b77a04SHong Zhang PetscErrorCode ierr; 386b2b77a04SHong Zhang PetscMPIInt size,rank; 387b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 388b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 389c92418ccSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 390b2b77a04SHong Zhang PetscInt N=fft->N; 391b2b77a04SHong Zhang 392b2b77a04SHong Zhang PetscFunctionBegin; 393b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 394b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 395b2b77a04SHong Zhang #endif 396b2b77a04SHong Zhang PetscValidHeaderSpecific(A,MAT_CLASSID,1); 397b2b77a04SHong Zhang PetscValidType(A,1); 398b2b77a04SHong Zhang 399b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 400b2b77a04SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 401b2b77a04SHong Zhang if (size == 1){ /* sequential case */ 402b2b77a04SHong Zhang if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 403b2b77a04SHong Zhang if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 404b2b77a04SHong Zhang } else { /* mpi case */ 405b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 4066882372aSHong Zhang ptrdiff_t local_n1,local_1_end; 407c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 408b2b77a04SHong Zhang fftw_complex *data_fin,*data_fout; 409c92418ccSAmlan Barua // PetscInt ctr; 410c92418ccSAmlan Barua // ptrdiff_t ndim1,*pdim; 411c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 412c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 41311902ff2SHong Zhang 414c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 415c92418ccSAmlan Barua // { 416c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 417c92418ccSAmlan Barua // } 418b2b77a04SHong Zhang 419b2b77a04SHong Zhang switch (ndim){ 420b2b77a04SHong Zhang case 1: 4216882372aSHong Zhang /* Get local size */ 422c92418ccSAmlan Barua /* We need to write an error message here saying that one cannot call this routine when doing paralllel 1D complex FFTW */ 423c92418ccSAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 4246882372aSHong Zhang 4256882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 4266882372aSHong Zhang if (fin) { 4276882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4286882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4296882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4306882372aSHong Zhang } 4316882372aSHong Zhang if (fout) { 4326882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4336882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4346882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4356882372aSHong Zhang } 436b2b77a04SHong Zhang break; 437b2b77a04SHong Zhang case 2: 438b2b77a04SHong Zhang /* Get local size */ 439b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 440b2b77a04SHong Zhang if (fin) { 441b2b77a04SHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 442b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 443b2b77a04SHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 444b2b77a04SHong Zhang } 445b2b77a04SHong Zhang if (fout) { 446b2b77a04SHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 447b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 448b2b77a04SHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 449b2b77a04SHong Zhang } 450b2b77a04SHong Zhang break; 451b2b77a04SHong Zhang case 3: 4526882372aSHong Zhang /* Get local size */ 4536882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 45411902ff2SHong Zhang // printf("The quantity n is %d",n); 4556882372aSHong Zhang if (fin) { 4566882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4576882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4586882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4596882372aSHong Zhang } 4606882372aSHong Zhang if (fout) { 4616882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4626882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4636882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4646882372aSHong Zhang } 465b2b77a04SHong Zhang break; 466b2b77a04SHong Zhang default: 4676882372aSHong Zhang /* Get local size */ 468c92418ccSAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 46911902ff2SHong Zhang // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 47011902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 47111902ff2SHong Zhang // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 47211902ff2SHong Zhang // for(i=0;i<ndim;i++) 47311902ff2SHong Zhang // { 47411902ff2SHong Zhang // pdim[i]=dim[i];printf("%d",pdim[i]); 47511902ff2SHong Zhang // } 47611902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 47711902ff2SHong Zhang // printf("The quantity n is %d",n); 4786882372aSHong Zhang if (fin) { 4796882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4806882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4816882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4826882372aSHong Zhang } 4836882372aSHong Zhang if (fout) { 4846882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4856882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4866882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4876882372aSHong Zhang } 488b2b77a04SHong Zhang break; 489b2b77a04SHong Zhang } 490b2b77a04SHong Zhang } 491fb7c10e0SHong Zhang if (fin){ 492262f75f9SHong Zhang ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 493fb7c10e0SHong Zhang } 494fb7c10e0SHong Zhang if (fout){ 495262f75f9SHong Zhang ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 496fb7c10e0SHong Zhang } 497b2b77a04SHong Zhang PetscFunctionReturn(0); 498b2b77a04SHong Zhang } 499b2b77a04SHong Zhang 500b2b77a04SHong Zhang EXTERN_C_BEGIN 501b2b77a04SHong Zhang #undef __FUNCT__ 502b2b77a04SHong Zhang #define __FUNCT__ "MatCreate_FFTW" 503b2b77a04SHong Zhang /* 504b2b77a04SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT 505b2b77a04SHong Zhang via the external package FFTW 506b2b77a04SHong Zhang 507b2b77a04SHong Zhang Options Database Keys: 508b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 509b2b77a04SHong Zhang 510b2b77a04SHong Zhang Level: intermediate 511b2b77a04SHong Zhang 512b2b77a04SHong Zhang */ 513b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 514b2b77a04SHong Zhang { 515b2b77a04SHong Zhang PetscErrorCode ierr; 516b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 517b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 518b2b77a04SHong Zhang Mat_FFTW *fftw; 519b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 520b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 521b2b77a04SHong Zhang PetscBool flg; 5226882372aSHong Zhang PetscInt p_flag,partial_dim=1,ctr; 52311902ff2SHong Zhang PetscMPIInt size,rank; 52411902ff2SHong Zhang ptrdiff_t *pdim; 525b2b77a04SHong Zhang 526b2b77a04SHong Zhang PetscFunctionBegin; 527b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 528b2b77a04SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 529b2b77a04SHong Zhang #endif 530b2b77a04SHong Zhang 531b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 53211902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 533c92418ccSAmlan Barua 53411902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 53511902ff2SHong Zhang pdim[0] = dim[0]; 53611902ff2SHong Zhang for(ctr=1;ctr<ndim;ctr++) 53711902ff2SHong Zhang { 5386882372aSHong Zhang partial_dim*= dim[ctr]; 53911902ff2SHong Zhang pdim[ctr] = dim[ctr]; 5406882372aSHong Zhang } 54111902ff2SHong Zhang // printf("partial dimension is %d",partial_dim); 542b2b77a04SHong Zhang if (size == 1) { 543b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 544b2b77a04SHong Zhang n = N; 545b2b77a04SHong Zhang } else { 5466882372aSHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end; 547b2b77a04SHong Zhang switch (ndim){ 548b2b77a04SHong Zhang case 1: 5496882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 5506882372aSHong Zhang n = (PetscInt)local_n0; 5516882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 552c92418ccSAmlan Barua // PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs_FFTW1D_C","MatGetVecs_FFTW1D",MatGetVecs_FFTW1D); 553b2b77a04SHong Zhang break; 554b2b77a04SHong Zhang case 2: 555b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 556b2b77a04SHong Zhang /* 557b2b77a04SHong Zhang PetscMPIInt rank; 558b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 559b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 560b2b77a04SHong Zhang */ 561b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 562b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 563b2b77a04SHong Zhang break; 564b2b77a04SHong Zhang case 3: 5656882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 56611902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 5676882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 5686882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 569b2b77a04SHong Zhang break; 570b2b77a04SHong Zhang default: 57111902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 572c92418ccSAmlan Barua // printf("The value of alloc local is %ld from process %d\n",alloc_local,rank); 57311902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 5746882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 57511902ff2SHong Zhang // printf("New partial dimension is %d %d %d",n,N,ndim); 5766882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 577b2b77a04SHong Zhang break; 578b2b77a04SHong Zhang } 579b2b77a04SHong Zhang } 580b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 581b2b77a04SHong Zhang 582b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 583b2b77a04SHong Zhang fft->data = (void*)fftw; 584b2b77a04SHong Zhang 585b2b77a04SHong Zhang fft->n = n; 586c92418ccSAmlan Barua fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 587c92418ccSAmlan Barua ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 588*b1301b2eSHong Zhang for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 589c92418ccSAmlan Barua 590b2b77a04SHong Zhang fftw->p_forward = 0; 591b2b77a04SHong Zhang fftw->p_backward = 0; 592b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 593b2b77a04SHong Zhang 594b2b77a04SHong Zhang if (size == 1){ 595b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 596b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 597b2b77a04SHong Zhang } else { 598b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 599b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 600b2b77a04SHong Zhang } 601b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 602b2b77a04SHong Zhang A->ops->getvecs = MatGetVecs_FFTW; 603b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 604b2b77a04SHong Zhang 605b2b77a04SHong Zhang /* get runtime options */ 606b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 607b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 608b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 609b2b77a04SHong Zhang PetscOptionsEnd(); 610b2b77a04SHong Zhang PetscFunctionReturn(0); 611b2b77a04SHong Zhang } 612b2b77a04SHong Zhang EXTERN_C_END 613