1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 15b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 16b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 17b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 18b2b77a04SHong Zhang } Mat_FFTW; 19b2b77a04SHong Zhang 20b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 21b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 25b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 26b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 27b2b77a04SHong Zhang 28b2b77a04SHong Zhang #undef __FUNCT__ 29b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 30b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 31b2b77a04SHong Zhang { 32b2b77a04SHong Zhang PetscErrorCode ierr; 33b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 34b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 35b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 36b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 37b2b77a04SHong Zhang 38b2b77a04SHong Zhang PetscFunctionBegin; 39b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 40b9ae062cSHong Zhang 41b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 42b2b77a04SHong Zhang #endif 43b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 44b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 45b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 46b2b77a04SHong Zhang switch (ndim){ 47b2b77a04SHong Zhang case 1: 48b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 49b2b77a04SHong Zhang break; 50b2b77a04SHong Zhang case 2: 51b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 52b2b77a04SHong Zhang break; 53b2b77a04SHong Zhang case 3: 54b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 55b2b77a04SHong Zhang break; 56b2b77a04SHong Zhang default: 57b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 58b2b77a04SHong Zhang break; 59b2b77a04SHong Zhang } 60b2b77a04SHong Zhang fftw->finarray = x_array; 61b2b77a04SHong Zhang fftw->foutarray = y_array; 62b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 63b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 64b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 65b2b77a04SHong Zhang } else { /* use existing plan */ 66b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 67b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 68b2b77a04SHong Zhang } else { 69b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 70b2b77a04SHong Zhang } 71b2b77a04SHong Zhang } 72b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 73b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 74b2b77a04SHong Zhang PetscFunctionReturn(0); 75b2b77a04SHong Zhang } 76b2b77a04SHong Zhang 77b2b77a04SHong Zhang #undef __FUNCT__ 78b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 79b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 80b2b77a04SHong Zhang { 81b2b77a04SHong Zhang PetscErrorCode ierr; 82b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 83b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 84b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 85b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 86b2b77a04SHong Zhang 87b2b77a04SHong Zhang PetscFunctionBegin; 88b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 89b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 90b2b77a04SHong Zhang #endif 91b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 92b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 93b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 94b2b77a04SHong Zhang switch (ndim){ 95b2b77a04SHong Zhang case 1: 96b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 97b2b77a04SHong Zhang break; 98b2b77a04SHong Zhang case 2: 99b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 100b2b77a04SHong Zhang break; 101b2b77a04SHong Zhang case 3: 102b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 103b2b77a04SHong Zhang break; 104b2b77a04SHong Zhang default: 105b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 106b2b77a04SHong Zhang break; 107b2b77a04SHong Zhang } 108b2b77a04SHong Zhang fftw->binarray = x_array; 109b2b77a04SHong Zhang fftw->boutarray = y_array; 110b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 111b2b77a04SHong Zhang } else { /* use existing plan */ 112b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 113b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 114b2b77a04SHong Zhang } else { 115b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 116b2b77a04SHong Zhang } 117b2b77a04SHong Zhang } 118b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 119b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 120b2b77a04SHong Zhang PetscFunctionReturn(0); 121b2b77a04SHong Zhang } 122b2b77a04SHong Zhang 123b2b77a04SHong Zhang #undef __FUNCT__ 124b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 125b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 126b2b77a04SHong Zhang { 127b2b77a04SHong Zhang PetscErrorCode ierr; 128b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 129b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 130b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 131c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 132b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 133c92418ccSAmlan Barua // PetscInt ctr; 134c92418ccSAmlan Barua // ptrdiff_t ndim1=(ptrdiff_t) ndim,*pdim; 135c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 136c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 137c92418ccSAmlan Barua 138c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 139c92418ccSAmlan Barua // { 140c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 141c92418ccSAmlan Barua // } 142b2b77a04SHong Zhang 143b2b77a04SHong Zhang PetscFunctionBegin; 144b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 145b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 146b2b77a04SHong Zhang #endif 147c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 148c92418ccSAmlan Barua // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 14911902ff2SHong Zhang 150b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 151b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 152b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 153b2b77a04SHong Zhang switch (ndim){ 154b2b77a04SHong Zhang case 1: 155*3c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 156b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 157*3c3a9c75SAmlan Barua #endif 158b2b77a04SHong Zhang break; 159b2b77a04SHong Zhang case 2: 160*3c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 161b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 162*3c3a9c75SAmlan Barua #else 163*3c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 164*3c3a9c75SAmlan Barua #endif 165b2b77a04SHong Zhang break; 166b2b77a04SHong Zhang case 3: 167*3c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 168b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 169*3c3a9c75SAmlan Barua #else 170*3c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[3],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 171*3c3a9c75SAmlan Barua #endif 172b2b77a04SHong Zhang break; 173b2b77a04SHong Zhang default: 174*3c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 175c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 176*3c3a9c75SAmlan Barua #else 177*3c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 178*3c3a9c75SAmlan Barua #endif 17911902ff2SHong Zhang // fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 180b2b77a04SHong Zhang break; 181b2b77a04SHong Zhang } 182b2b77a04SHong Zhang fftw->finarray = x_array; 183b2b77a04SHong Zhang fftw->foutarray = y_array; 184b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 185b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 186b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 187b2b77a04SHong Zhang } else { /* use existing plan */ 188b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 189b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 190b2b77a04SHong Zhang } else { 191b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 192b2b77a04SHong Zhang } 193b2b77a04SHong Zhang } 194b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 195b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 196b2b77a04SHong Zhang PetscFunctionReturn(0); 197b2b77a04SHong Zhang } 198b2b77a04SHong Zhang 199b2b77a04SHong Zhang #undef __FUNCT__ 200b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 201b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 202b2b77a04SHong Zhang { 203b2b77a04SHong Zhang PetscErrorCode ierr; 204b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 205b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 206b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 207c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 208b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 209c92418ccSAmlan Barua // PetscInt ctr; 210c92418ccSAmlan Barua // ptrdiff_t ndim1=(ptrdiff_t)ndim,*pdim; 211c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 212c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 213c92418ccSAmlan Barua 214c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 215c92418ccSAmlan Barua // { 216c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 217c92418ccSAmlan Barua // } 218b2b77a04SHong Zhang 219b2b77a04SHong Zhang PetscFunctionBegin; 220*3c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 221*3c3a9c75SAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 222*3c3a9c75SAmlan Barua //#endif 223c92418ccSAmlan Barua // ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr); 224c92418ccSAmlan Barua // should pdim be a member of Mat_FFTW? 225c92418ccSAmlan Barua // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 22611902ff2SHong Zhang 227b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 228b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 229b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 230b2b77a04SHong Zhang switch (ndim){ 231b2b77a04SHong Zhang case 1: 232*3c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 233b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 234*3c3a9c75SAmlan Barua #endif 235b2b77a04SHong Zhang break; 236b2b77a04SHong Zhang case 2: 237*3c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 238b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 239*3c3a9c75SAmlan Barua #else 240*3c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 241*3c3a9c75SAmlan Barua #endif 242b2b77a04SHong Zhang break; 243b2b77a04SHong Zhang case 3: 244*3c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 245b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 246*3c3a9c75SAmlan Barua #else 247*3c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 248*3c3a9c75SAmlan Barua #endif 249b2b77a04SHong Zhang break; 250b2b77a04SHong Zhang default: 251*3c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 252c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 253*3c3a9c75SAmlan Barua #else 254*3c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 255*3c3a9c75SAmlan Barua #endif 256c92418ccSAmlan Barua // fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 257b2b77a04SHong Zhang break; 258b2b77a04SHong Zhang } 259b2b77a04SHong Zhang fftw->binarray = x_array; 260b2b77a04SHong Zhang fftw->boutarray = y_array; 261b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 262b2b77a04SHong Zhang } else { /* use existing plan */ 263b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 264b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 265b2b77a04SHong Zhang } else { 266b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 267b2b77a04SHong Zhang } 268b2b77a04SHong Zhang } 269b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 270b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 271b2b77a04SHong Zhang PetscFunctionReturn(0); 272b2b77a04SHong Zhang } 273b2b77a04SHong Zhang 274b2b77a04SHong Zhang #undef __FUNCT__ 275b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 276b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 277b2b77a04SHong Zhang { 278b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 279b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 280b2b77a04SHong Zhang PetscErrorCode ierr; 281b2b77a04SHong Zhang 282b2b77a04SHong Zhang PetscFunctionBegin; 283b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 284b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 285b2b77a04SHong Zhang #endif 286b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 287b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 288b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 289bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 290b2b77a04SHong Zhang PetscFunctionReturn(0); 291b2b77a04SHong Zhang } 292b2b77a04SHong Zhang 293c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 294b2b77a04SHong Zhang #undef __FUNCT__ 295b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 296b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 297b2b77a04SHong Zhang { 298b2b77a04SHong Zhang PetscErrorCode ierr; 299b2b77a04SHong Zhang PetscScalar *array; 300b2b77a04SHong Zhang 301b2b77a04SHong Zhang PetscFunctionBegin; 302b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 303b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 304b2b77a04SHong Zhang #endif 305b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 306b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 307b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 308b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 309b2b77a04SHong Zhang PetscFunctionReturn(0); 310b2b77a04SHong Zhang } 311b2b77a04SHong Zhang 312b2b77a04SHong Zhang #undef __FUNCT__ 313*3c3a9c75SAmlan Barua #define __FUNCT__ "MatGetVecs1DC_FFTW" 314c92418ccSAmlan Barua /* 315c92418ccSAmlan Barua MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the 316c92418ccSAmlan Barua parallel layout determined by FFTW-1D 317c92418ccSAmlan Barua 318c92418ccSAmlan Barua */ 319c92418ccSAmlan Barua PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout) 320c92418ccSAmlan Barua { 321c92418ccSAmlan Barua PetscErrorCode ierr; 322c92418ccSAmlan Barua PetscMPIInt size,rank; 323c92418ccSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 324c92418ccSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 325c92418ccSAmlan Barua // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 326c92418ccSAmlan Barua PetscInt N=fft->N; 327c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 328c92418ccSAmlan Barua ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 329c92418ccSAmlan Barua ptrdiff_t f_local_n1,f_local_1_end; 330c92418ccSAmlan Barua ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 331c92418ccSAmlan Barua ptrdiff_t b_local_n1,b_local_1_end; 332c92418ccSAmlan Barua fftw_complex *data_fin,*data_fout,*data_bin,*data_bout; 333c92418ccSAmlan Barua 334c92418ccSAmlan Barua PetscFunctionBegin; 335c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 336c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 337c92418ccSAmlan Barua #endif 338c92418ccSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 339c92418ccSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 340c92418ccSAmlan Barua if (size == 1){ 341c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 342c92418ccSAmlan Barua } 343c92418ccSAmlan Barua else { 344c92418ccSAmlan Barua if (ndim>1){ 345c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 346c92418ccSAmlan Barua else { 347c92418ccSAmlan Barua f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 348c92418ccSAmlan Barua b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 349c92418ccSAmlan Barua if (fin) { 350c92418ccSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 351c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 352c92418ccSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 353c92418ccSAmlan Barua } 354c92418ccSAmlan Barua if (fout) { 355c92418ccSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 356c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 357c92418ccSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 358c92418ccSAmlan Barua } 359c92418ccSAmlan Barua if (bin) { 360c92418ccSAmlan Barua data_bin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 361c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr); 362c92418ccSAmlan Barua (*bin)->ops->destroy = VecDestroy_MPIFFTW; 363c92418ccSAmlan Barua } 364c92418ccSAmlan Barua if (bout) { 365c92418ccSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 366c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 367c92418ccSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 368c92418ccSAmlan Barua } 369c92418ccSAmlan Barua } 370c92418ccSAmlan Barua if (fin){ 371c92418ccSAmlan Barua ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 372c92418ccSAmlan Barua } 373c92418ccSAmlan Barua if (fout){ 374c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 375c92418ccSAmlan Barua } 376c92418ccSAmlan Barua if (bin){ 377c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr); 378c92418ccSAmlan Barua } 379c92418ccSAmlan Barua if (bout){ 380c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 381c92418ccSAmlan Barua } 382c92418ccSAmlan Barua PetscFunctionReturn(0); 383c92418ccSAmlan Barua } 384c92418ccSAmlan Barua 385c92418ccSAmlan Barua 386c92418ccSAmlan Barua } 387*3c3a9c75SAmlan Barua 388c92418ccSAmlan Barua #undef __FUNCT__ 389b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW" 390b2b77a04SHong Zhang /* 391b2b77a04SHong Zhang MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 392b2b77a04SHong Zhang parallel layout determined by FFTW 393b2b77a04SHong Zhang 394b2b77a04SHong Zhang Collective on Mat 395b2b77a04SHong Zhang 396b2b77a04SHong Zhang Input Parameter: 397b2b77a04SHong Zhang . mat - the matrix 398b2b77a04SHong Zhang 399b2b77a04SHong Zhang Output Parameter: 400b2b77a04SHong Zhang + fin - (optional) input vector of forward FFTW 401b2b77a04SHong Zhang - fout - (optional) output vector of forward FFTW 402b2b77a04SHong Zhang 403b2b77a04SHong Zhang Level: advanced 404b2b77a04SHong Zhang 405b2b77a04SHong Zhang .seealso: MatCreateFFTW() 406b2b77a04SHong Zhang */ 407b2b77a04SHong Zhang PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 408b2b77a04SHong Zhang { 409b2b77a04SHong Zhang PetscErrorCode ierr; 410b2b77a04SHong Zhang PetscMPIInt size,rank; 411b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 412b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 413c92418ccSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 414*3c3a9c75SAmlan Barua PetscInt N=fft->N, N1, n1,vsize; 415b2b77a04SHong Zhang 416b2b77a04SHong Zhang PetscFunctionBegin; 417*3c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 418*3c3a9c75SAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 419*3c3a9c75SAmlan Barua //#endif 420b2b77a04SHong Zhang PetscValidHeaderSpecific(A,MAT_CLASSID,1); 421b2b77a04SHong Zhang PetscValidType(A,1); 422b2b77a04SHong Zhang 423b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 424b2b77a04SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 425b2b77a04SHong Zhang if (size == 1){ /* sequential case */ 426b2b77a04SHong Zhang if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 427b2b77a04SHong Zhang if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 428*3c3a9c75SAmlan Barua printf("The code successfully comes at the end of the routine with one processor\n"); 429b2b77a04SHong Zhang } else { /* mpi case */ 430b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 4316882372aSHong Zhang ptrdiff_t local_n1,local_1_end; 432c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 433b2b77a04SHong Zhang fftw_complex *data_fin,*data_fout; 434*3c3a9c75SAmlan Barua double *data_finr, *data_foutr; 435*3c3a9c75SAmlan Barua ptrdiff_t local_1_start; 436*3c3a9c75SAmlan Barua PetscInt N1; 437c92418ccSAmlan Barua // PetscInt ctr; 438c92418ccSAmlan Barua // ptrdiff_t ndim1,*pdim; 439c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 440c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 44111902ff2SHong Zhang 442c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 443c92418ccSAmlan Barua // { 444c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 445c92418ccSAmlan Barua // } 446b2b77a04SHong Zhang 447b2b77a04SHong Zhang switch (ndim){ 448b2b77a04SHong Zhang case 1: 4496882372aSHong Zhang /* Get local size */ 450c92418ccSAmlan Barua /* We need to write an error message here saying that one cannot call this routine when doing paralllel 1D complex FFTW */ 451c92418ccSAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 4526882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 4536882372aSHong Zhang if (fin) { 4546882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4556882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4566882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4576882372aSHong Zhang } 4586882372aSHong Zhang if (fout) { 4596882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4606882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4616882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4626882372aSHong Zhang } 463b2b77a04SHong Zhang break; 464b2b77a04SHong Zhang case 2: 465*3c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 466*3c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 467*3c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 468*3c3a9c75SAmlan Barua if (fin) { 469*3c3a9c75SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 470*3c3a9c75SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 471*3c3a9c75SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 472*3c3a9c75SAmlan Barua printf("The code comes here with vector size %d\n",vsize); 473*3c3a9c75SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 474*3c3a9c75SAmlan Barua } 475*3c3a9c75SAmlan Barua if (fout) { 476*3c3a9c75SAmlan Barua data_foutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 477*3c3a9c75SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_foutr,fout);CHKERRQ(ierr); 478*3c3a9c75SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 479*3c3a9c75SAmlan Barua } 480*3c3a9c75SAmlan Barua printf("The code successfully comes at the end of the routine %d, %d\n",n1,N1); 481*3c3a9c75SAmlan Barua 482*3c3a9c75SAmlan Barua #else 483b2b77a04SHong Zhang /* Get local size */ 484b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 485b2b77a04SHong Zhang if (fin) { 486b2b77a04SHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 487b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 488b2b77a04SHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 489b2b77a04SHong Zhang } 490b2b77a04SHong Zhang if (fout) { 491b2b77a04SHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 492b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 493b2b77a04SHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 494b2b77a04SHong Zhang } 495*3c3a9c75SAmlan Barua printf("Hope this does not come here"); 496*3c3a9c75SAmlan Barua #endif 497b2b77a04SHong Zhang break; 498b2b77a04SHong Zhang case 3: 4996882372aSHong Zhang /* Get local size */ 500*3c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 501*3c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"Not done yet"); 502*3c3a9c75SAmlan Barua #else 5036882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 50411902ff2SHong Zhang // printf("The quantity n is %d",n); 5056882372aSHong Zhang if (fin) { 5066882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5076882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5086882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5096882372aSHong Zhang } 5106882372aSHong Zhang if (fout) { 5116882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5126882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5136882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5146882372aSHong Zhang } 515*3c3a9c75SAmlan Barua #endif 516b2b77a04SHong Zhang break; 517b2b77a04SHong Zhang default: 5186882372aSHong Zhang /* Get local size */ 519*3c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 520*3c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"Not done yet"); 521*3c3a9c75SAmlan Barua #else 522c92418ccSAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 52311902ff2SHong Zhang // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 52411902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 52511902ff2SHong Zhang // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 52611902ff2SHong Zhang // for(i=0;i<ndim;i++) 52711902ff2SHong Zhang // { 52811902ff2SHong Zhang // pdim[i]=dim[i];printf("%d",pdim[i]); 52911902ff2SHong Zhang // } 53011902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 53111902ff2SHong Zhang // printf("The quantity n is %d",n); 5326882372aSHong Zhang if (fin) { 5336882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5346882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5356882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5366882372aSHong Zhang } 5376882372aSHong Zhang if (fout) { 5386882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5396882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5406882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5416882372aSHong Zhang } 542*3c3a9c75SAmlan Barua #endif 543b2b77a04SHong Zhang break; 544b2b77a04SHong Zhang } 545b2b77a04SHong Zhang } 546fb7c10e0SHong Zhang if (fin){ 547262f75f9SHong Zhang ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 548fb7c10e0SHong Zhang } 549fb7c10e0SHong Zhang if (fout){ 550262f75f9SHong Zhang ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 551fb7c10e0SHong Zhang } 552b2b77a04SHong Zhang PetscFunctionReturn(0); 553b2b77a04SHong Zhang } 554b2b77a04SHong Zhang 555*3c3a9c75SAmlan Barua //EXTERN_C_BEGIN - Do we need this? 556b2b77a04SHong Zhang #undef __FUNCT__ 557*3c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT" 558*3c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 559*3c3a9c75SAmlan Barua { 560*3c3a9c75SAmlan Barua PetscErrorCode ierr; 561*3c3a9c75SAmlan Barua PetscFunctionBegin; 562*3c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 563*3c3a9c75SAmlan Barua PetscFunctionReturn(0); 564*3c3a9c75SAmlan Barua } 565*3c3a9c75SAmlan Barua //EXTERN_C_END - Do we need this? 566b2b77a04SHong Zhang /* 567*3c3a9c75SAmlan Barua InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block 568*3c3a9c75SAmlan Barua Input A, x, y 569*3c3a9c75SAmlan Barua A - FFTW matrix 570*3c3a9c75SAmlan Barua x - user d 571b2b77a04SHong Zhang Options Database Keys: 572b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 573b2b77a04SHong Zhang 574b2b77a04SHong Zhang Level: intermediate 575b2b77a04SHong Zhang 576b2b77a04SHong Zhang */ 577*3c3a9c75SAmlan Barua 578*3c3a9c75SAmlan Barua EXTERN_C_BEGIN 579*3c3a9c75SAmlan Barua #undef __FUNCT__ 580*3c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW" 581*3c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 582*3c3a9c75SAmlan Barua { 583*3c3a9c75SAmlan Barua PetscErrorCode ierr; 584*3c3a9c75SAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 585*3c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 586*3c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 587*3c3a9c75SAmlan Barua PetscInt N=fft->N, N1, n1 ,NM; 588*3c3a9c75SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 589*3c3a9c75SAmlan Barua PetscInt low, *indx1, *indx2, tempindx, tempindx1; 590*3c3a9c75SAmlan Barua PetscInt i,j; 591*3c3a9c75SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 592*3c3a9c75SAmlan Barua ptrdiff_t local_n1,local_1_start; 593*3c3a9c75SAmlan Barua VecScatter vecscat; 594*3c3a9c75SAmlan Barua IS list1,list2; 595*3c3a9c75SAmlan Barua 596*3c3a9c75SAmlan Barua ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 597*3c3a9c75SAmlan Barua 598*3c3a9c75SAmlan Barua switch (ndim){ 599*3c3a9c75SAmlan Barua case 1: 600*3c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"Not Supported by FFTW"); 601*3c3a9c75SAmlan Barua break; 602*3c3a9c75SAmlan Barua case 2: 603*3c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 604*3c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 605*3c3a9c75SAmlan Barua 606*3c3a9c75SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*local_n0*N1,&indx1);CHKERRQ(ierr); 607*3c3a9c75SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*local_n0*N1,&indx2);CHKERRQ(ierr); 608*3c3a9c75SAmlan Barua 609*3c3a9c75SAmlan Barua if (dim[1]%2==0) 610*3c3a9c75SAmlan Barua NM = dim[1]+2; 611*3c3a9c75SAmlan Barua else 612*3c3a9c75SAmlan Barua NM = dim[1]+1; 613*3c3a9c75SAmlan Barua 614*3c3a9c75SAmlan Barua for (i=0;i<local_n0;i++){ 615*3c3a9c75SAmlan Barua for (j=0;j<dim[1];j++){ 616*3c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 617*3c3a9c75SAmlan Barua tempindx1 = i*NM + j; 618*3c3a9c75SAmlan Barua indx1[tempindx]=local_0_start*N1+tempindx; 619*3c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 620*3c3a9c75SAmlan Barua // printf("index3 %d from proc %d is \n",indx3[tempindx],rank); 621*3c3a9c75SAmlan Barua // printf("index4 %d from proc %d is \n",indx4[tempindx],rank); 622*3c3a9c75SAmlan Barua } 623*3c3a9c75SAmlan Barua } 624*3c3a9c75SAmlan Barua 625*3c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 626*3c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 627*3c3a9c75SAmlan Barua 628*3c3a9c75SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat); 629*3c3a9c75SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD); 630*3c3a9c75SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD); 631*3c3a9c75SAmlan Barua ierr = VecScatterDestroy(&vecscat); 632*3c3a9c75SAmlan Barua break; 633*3c3a9c75SAmlan Barua 634*3c3a9c75SAmlan Barua case 3: 635*3c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet"); 636*3c3a9c75SAmlan Barua break; 637*3c3a9c75SAmlan Barua 638*3c3a9c75SAmlan Barua default: 639*3c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet"); 640*3c3a9c75SAmlan Barua break; 641*3c3a9c75SAmlan Barua } 642*3c3a9c75SAmlan Barua 643*3c3a9c75SAmlan Barua return 0; 644*3c3a9c75SAmlan Barua } 645*3c3a9c75SAmlan Barua EXTERN_C_END 646*3c3a9c75SAmlan Barua 647*3c3a9c75SAmlan Barua /* 648*3c3a9c75SAmlan Barua //EXTERN_C_BEGIN - Do we need this? 649*3c3a9c75SAmlan Barua #undef __FUNCT__ 650*3c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT" 651*3c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 652*3c3a9c75SAmlan Barua { 653*3c3a9c75SAmlan Barua PetscErrorCode ierr; 654*3c3a9c75SAmlan Barua PetscFunctionBegin; 655*3c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 656*3c3a9c75SAmlan Barua PetscFunctionReturn(0); 657*3c3a9c75SAmlan Barua } 658*3c3a9c75SAmlan Barua //EXTERN_C_END - Do we need this? 659*3c3a9c75SAmlan Barua */ 660*3c3a9c75SAmlan Barua 661*3c3a9c75SAmlan Barua EXTERN_C_BEGIN 662*3c3a9c75SAmlan Barua #undef __FUNCT__ 663*3c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 664*3c3a9c75SAmlan Barua /* 665*3c3a9c75SAmlan Barua MatCreate_FFTW - Creates a matrix object that provides FFT 666*3c3a9c75SAmlan Barua via the external package FFTW 667*3c3a9c75SAmlan Barua Options Database Keys: 668*3c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 669*3c3a9c75SAmlan Barua 670*3c3a9c75SAmlan Barua Level: intermediate 671*3c3a9c75SAmlan Barua 672*3c3a9c75SAmlan Barua */ 673*3c3a9c75SAmlan Barua 674b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 675b2b77a04SHong Zhang { 676b2b77a04SHong Zhang PetscErrorCode ierr; 677b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 678b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 679b2b77a04SHong Zhang Mat_FFTW *fftw; 680b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 681b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 682b2b77a04SHong Zhang PetscBool flg; 6836882372aSHong Zhang PetscInt p_flag,partial_dim=1,ctr; 68411902ff2SHong Zhang PetscMPIInt size,rank; 68511902ff2SHong Zhang ptrdiff_t *pdim; 686b2b77a04SHong Zhang 687b2b77a04SHong Zhang PetscFunctionBegin; 688*3c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 689*3c3a9c75SAmlan Barua // SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 690*3c3a9c75SAmlan Barua //#endif 691b2b77a04SHong Zhang 692b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 69311902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 694c92418ccSAmlan Barua 69511902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 69611902ff2SHong Zhang pdim[0] = dim[0]; 69711902ff2SHong Zhang for(ctr=1;ctr<ndim;ctr++) 69811902ff2SHong Zhang { 6996882372aSHong Zhang partial_dim *= dim[ctr]; 70011902ff2SHong Zhang pdim[ctr] = dim[ctr]; 7016882372aSHong Zhang } 702*3c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 703*3c3a9c75SAmlan Barua // SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 704*3c3a9c75SAmlan Barua //#endif 705*3c3a9c75SAmlan Barua 70611902ff2SHong Zhang // printf("partial dimension is %d",partial_dim); 707b2b77a04SHong Zhang if (size == 1) { 708b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 709b2b77a04SHong Zhang n = N; 710b2b77a04SHong Zhang } else { 7116882372aSHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end; 712b2b77a04SHong Zhang switch (ndim){ 713b2b77a04SHong Zhang case 1: 714*3c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 715*3c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 716*3c3a9c75SAmlan Barua #endif 7176882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 7186882372aSHong Zhang n = (PetscInt)local_n0; 7196882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 720*3c3a9c75SAmlan Barua // PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW); 721b2b77a04SHong Zhang break; 722b2b77a04SHong Zhang case 2: 723b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 724b2b77a04SHong Zhang /* 725b2b77a04SHong Zhang PetscMPIInt rank; 726b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 727b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 728b2b77a04SHong Zhang */ 729b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 730b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 731b2b77a04SHong Zhang break; 732b2b77a04SHong Zhang case 3: 73311902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 7346882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 7356882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 736b2b77a04SHong Zhang break; 737b2b77a04SHong Zhang default: 73811902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 739c92418ccSAmlan Barua // printf("The value of alloc local is %ld from process %d\n",alloc_local,rank); 74011902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 7416882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 74211902ff2SHong Zhang // printf("New partial dimension is %d %d %d",n,N,ndim); 7436882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 744b2b77a04SHong Zhang break; 745b2b77a04SHong Zhang } 746b2b77a04SHong Zhang } 747b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 748b2b77a04SHong Zhang 749b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 750b2b77a04SHong Zhang fft->data = (void*)fftw; 751b2b77a04SHong Zhang 752b2b77a04SHong Zhang fft->n = n; 753c92418ccSAmlan Barua fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 754c92418ccSAmlan Barua ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 755b1301b2eSHong Zhang for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 756c92418ccSAmlan Barua 757b2b77a04SHong Zhang fftw->p_forward = 0; 758b2b77a04SHong Zhang fftw->p_backward = 0; 759b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 760b2b77a04SHong Zhang 761b2b77a04SHong Zhang if (size == 1){ 762b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 763b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 764b2b77a04SHong Zhang } else { 765b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 766b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 767b2b77a04SHong Zhang } 768b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 769b2b77a04SHong Zhang A->ops->getvecs = MatGetVecs_FFTW; 770b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 771*3c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 772*3c3a9c75SAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 773*3c3a9c75SAmlan Barua // PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 774*3c3a9c75SAmlan Barua #endif 775b2b77a04SHong Zhang 776b2b77a04SHong Zhang /* get runtime options */ 777b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 778b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 779b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 780b2b77a04SHong Zhang PetscOptionsEnd(); 781b2b77a04SHong Zhang PetscFunctionReturn(0); 782b2b77a04SHong Zhang } 783b2b77a04SHong Zhang EXTERN_C_END 784*3c3a9c75SAmlan Barua 785*3c3a9c75SAmlan Barua 786*3c3a9c75SAmlan Barua 787*3c3a9c75SAmlan Barua 788