1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13*b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 15b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 16b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 17b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 18b2b77a04SHong Zhang } Mat_FFTW; 19b2b77a04SHong Zhang 20b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 21b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 25b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 26b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 27b2b77a04SHong Zhang 28b2b77a04SHong Zhang #undef __FUNCT__ 29b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 30b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 31b2b77a04SHong Zhang { 32b2b77a04SHong Zhang PetscErrorCode ierr; 33b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 34b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 35b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 36b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 37b2b77a04SHong Zhang 38b2b77a04SHong Zhang PetscFunctionBegin; 39b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 40*b9ae062cSHong Zhang 41b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 42b2b77a04SHong Zhang #endif 43b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 44b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 45b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 46b2b77a04SHong Zhang switch (ndim){ 47b2b77a04SHong Zhang case 1: 48b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 49b2b77a04SHong Zhang break; 50b2b77a04SHong Zhang case 2: 51b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 52b2b77a04SHong Zhang break; 53b2b77a04SHong Zhang case 3: 54b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 55b2b77a04SHong Zhang break; 56b2b77a04SHong Zhang default: 57b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 58b2b77a04SHong Zhang break; 59b2b77a04SHong Zhang } 60b2b77a04SHong Zhang fftw->finarray = x_array; 61b2b77a04SHong Zhang fftw->foutarray = y_array; 62b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 63b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 64b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 65b2b77a04SHong Zhang } else { /* use existing plan */ 66b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 67b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 68b2b77a04SHong Zhang } else { 69b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 70b2b77a04SHong Zhang } 71b2b77a04SHong Zhang } 72b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 73b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 74b2b77a04SHong Zhang PetscFunctionReturn(0); 75b2b77a04SHong Zhang } 76b2b77a04SHong Zhang 77b2b77a04SHong Zhang #undef __FUNCT__ 78b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 79b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 80b2b77a04SHong Zhang { 81b2b77a04SHong Zhang PetscErrorCode ierr; 82b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 83b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 84b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 85b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 86b2b77a04SHong Zhang 87b2b77a04SHong Zhang PetscFunctionBegin; 88b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 89b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 90b2b77a04SHong Zhang #endif 91b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 92b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 93b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 94b2b77a04SHong Zhang switch (ndim){ 95b2b77a04SHong Zhang case 1: 96b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 97b2b77a04SHong Zhang break; 98b2b77a04SHong Zhang case 2: 99b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 100b2b77a04SHong Zhang break; 101b2b77a04SHong Zhang case 3: 102b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 103b2b77a04SHong Zhang break; 104b2b77a04SHong Zhang default: 105b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 106b2b77a04SHong Zhang break; 107b2b77a04SHong Zhang } 108b2b77a04SHong Zhang fftw->binarray = x_array; 109b2b77a04SHong Zhang fftw->boutarray = y_array; 110b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 111b2b77a04SHong Zhang } else { /* use existing plan */ 112b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 113b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 114b2b77a04SHong Zhang } else { 115b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 116b2b77a04SHong Zhang } 117b2b77a04SHong Zhang } 118b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 119b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 120b2b77a04SHong Zhang PetscFunctionReturn(0); 121b2b77a04SHong Zhang } 122b2b77a04SHong Zhang 123b2b77a04SHong Zhang #undef __FUNCT__ 124b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 125b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 126b2b77a04SHong Zhang { 127b2b77a04SHong Zhang PetscErrorCode ierr; 128b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 129b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 130b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 13111902ff2SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim,ctr; 132b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 13311902ff2SHong Zhang ptrdiff_t ndim1=(ptrdiff_t) ndim,*pdim; 134b2b77a04SHong Zhang 135b2b77a04SHong Zhang PetscFunctionBegin; 136b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 137b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 138b2b77a04SHong Zhang #endif 13911902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 14011902ff2SHong Zhang for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 14111902ff2SHong Zhang 142b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 143b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 144b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 145b2b77a04SHong Zhang switch (ndim){ 146b2b77a04SHong Zhang case 1: 147b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 148b2b77a04SHong Zhang break; 149b2b77a04SHong Zhang case 2: 150b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 151b2b77a04SHong Zhang break; 152b2b77a04SHong Zhang case 3: 153b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 154b2b77a04SHong Zhang break; 155b2b77a04SHong Zhang default: 15611902ff2SHong Zhang fftw->p_forward = fftw_mpi_plan_dft(ndim1,pdim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 15711902ff2SHong Zhang // fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 158b2b77a04SHong Zhang break; 159b2b77a04SHong Zhang } 160b2b77a04SHong Zhang fftw->finarray = x_array; 161b2b77a04SHong Zhang fftw->foutarray = y_array; 162b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 163b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 164b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 165b2b77a04SHong Zhang } else { /* use existing plan */ 166b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 167b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 168b2b77a04SHong Zhang } else { 169b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 170b2b77a04SHong Zhang } 171b2b77a04SHong Zhang } 172b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 173b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 174b2b77a04SHong Zhang PetscFunctionReturn(0); 175b2b77a04SHong Zhang } 176b2b77a04SHong Zhang 177b2b77a04SHong Zhang #undef __FUNCT__ 178b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 179b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 180b2b77a04SHong Zhang { 181b2b77a04SHong Zhang PetscErrorCode ierr; 182b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 183b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 184b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 18511902ff2SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim,ctr; 186b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 18711902ff2SHong Zhang ptrdiff_t ndim1=(ptrdiff_t)ndim,*pdim; 188b2b77a04SHong Zhang 189b2b77a04SHong Zhang PetscFunctionBegin; 190b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 191b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 192b2b77a04SHong Zhang #endif 19311902ff2SHong Zhang ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr); // should pdim be a member of Mat_FFTW? 19411902ff2SHong Zhang for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 19511902ff2SHong Zhang 196b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 197b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 198b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 199b2b77a04SHong Zhang switch (ndim){ 200b2b77a04SHong Zhang case 1: 201b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 202b2b77a04SHong Zhang break; 203b2b77a04SHong Zhang case 2: 204b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 205b2b77a04SHong Zhang break; 206b2b77a04SHong Zhang case 3: 207b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 208b2b77a04SHong Zhang break; 209b2b77a04SHong Zhang default: 21011902ff2SHong Zhang fftw->p_backward = fftw_mpi_plan_dft(ndim1,pdim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 211b2b77a04SHong Zhang break; 212b2b77a04SHong Zhang } 213b2b77a04SHong Zhang fftw->binarray = x_array; 214b2b77a04SHong Zhang fftw->boutarray = y_array; 215b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 216b2b77a04SHong Zhang } else { /* use existing plan */ 217b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 218b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 219b2b77a04SHong Zhang } else { 220b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 221b2b77a04SHong Zhang } 222b2b77a04SHong Zhang } 223b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 224b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 22511902ff2SHong Zhang ierr = PetscFree(pdim);CHKERRQ(ierr); 226b2b77a04SHong Zhang PetscFunctionReturn(0); 227b2b77a04SHong Zhang } 228b2b77a04SHong Zhang 229b2b77a04SHong Zhang #undef __FUNCT__ 230b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 231b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 232b2b77a04SHong Zhang { 233b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 234b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 235b2b77a04SHong Zhang PetscErrorCode ierr; 236b2b77a04SHong Zhang 237b2b77a04SHong Zhang PetscFunctionBegin; 238b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 239b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 240b2b77a04SHong Zhang #endif 241b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 242b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 243bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 244b2b77a04SHong Zhang PetscFunctionReturn(0); 245b2b77a04SHong Zhang } 246b2b77a04SHong Zhang 247c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 248b2b77a04SHong Zhang #undef __FUNCT__ 249b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 250b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 251b2b77a04SHong Zhang { 252b2b77a04SHong Zhang PetscErrorCode ierr; 253b2b77a04SHong Zhang PetscScalar *array; 254b2b77a04SHong Zhang 255b2b77a04SHong Zhang PetscFunctionBegin; 256b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 257b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 258b2b77a04SHong Zhang #endif 259b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 260b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 261b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 262b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 263b2b77a04SHong Zhang PetscFunctionReturn(0); 264b2b77a04SHong Zhang } 265b2b77a04SHong Zhang 266b2b77a04SHong Zhang #undef __FUNCT__ 267b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW" 268b2b77a04SHong Zhang /* 269b2b77a04SHong Zhang MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 270b2b77a04SHong Zhang parallel layout determined by FFTW 271b2b77a04SHong Zhang 272b2b77a04SHong Zhang Collective on Mat 273b2b77a04SHong Zhang 274b2b77a04SHong Zhang Input Parameter: 275b2b77a04SHong Zhang . mat - the matrix 276b2b77a04SHong Zhang 277b2b77a04SHong Zhang Output Parameter: 278b2b77a04SHong Zhang + fin - (optional) input vector of forward FFTW 279b2b77a04SHong Zhang - fout - (optional) output vector of forward FFTW 280b2b77a04SHong Zhang 281b2b77a04SHong Zhang Level: advanced 282b2b77a04SHong Zhang 283b2b77a04SHong Zhang .seealso: MatCreateFFTW() 284b2b77a04SHong Zhang */ 285b2b77a04SHong Zhang PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 286b2b77a04SHong Zhang { 287b2b77a04SHong Zhang PetscErrorCode ierr; 288b2b77a04SHong Zhang PetscMPIInt size,rank; 289b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 290b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 291b2b77a04SHong Zhang PetscInt N=fft->N; 292b2b77a04SHong Zhang 293b2b77a04SHong Zhang PetscFunctionBegin; 294b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 295b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 296b2b77a04SHong Zhang #endif 297b2b77a04SHong Zhang PetscValidHeaderSpecific(A,MAT_CLASSID,1); 298b2b77a04SHong Zhang PetscValidType(A,1); 299b2b77a04SHong Zhang 300b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 301b2b77a04SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 302b2b77a04SHong Zhang if (size == 1){ /* sequential case */ 303b2b77a04SHong Zhang if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 304b2b77a04SHong Zhang if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 305b2b77a04SHong Zhang } else { /* mpi case */ 306b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 3076882372aSHong Zhang ptrdiff_t local_n1,local_1_end; 30811902ff2SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n,ctr; 309b2b77a04SHong Zhang fftw_complex *data_fin,*data_fout; 31011902ff2SHong Zhang ptrdiff_t ndim1,*pdim; 31111902ff2SHong Zhang ndim1=(ptrdiff_t) ndim; 31211902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 31311902ff2SHong Zhang 31411902ff2SHong Zhang for(ctr=0;ctr<ndim;ctr++) 31511902ff2SHong Zhang { 31611902ff2SHong Zhang pdim[ctr] = dim[ctr]; 31711902ff2SHong Zhang } 318b2b77a04SHong Zhang 319b2b77a04SHong Zhang switch (ndim){ 320b2b77a04SHong Zhang case 1: 3216882372aSHong Zhang /* Get local size */ 3226882372aSHong Zhang 3236882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 3246882372aSHong Zhang if (fin) { 3256882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 3266882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 3276882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 3286882372aSHong Zhang } 3296882372aSHong Zhang if (fout) { 3306882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 3316882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 3326882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 3336882372aSHong Zhang } 334b2b77a04SHong Zhang break; 335b2b77a04SHong Zhang case 2: 336b2b77a04SHong Zhang /* Get local size */ 337b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 338b2b77a04SHong Zhang if (fin) { 339b2b77a04SHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 340b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 341b2b77a04SHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 342b2b77a04SHong Zhang } 343b2b77a04SHong Zhang if (fout) { 344b2b77a04SHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 345b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 346b2b77a04SHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 347b2b77a04SHong Zhang } 348b2b77a04SHong Zhang break; 349b2b77a04SHong Zhang case 3: 3506882372aSHong Zhang /* Get local size */ 3516882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 35211902ff2SHong Zhang // printf("The quantity n is %d",n); 3536882372aSHong Zhang if (fin) { 3546882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 3556882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 3566882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 3576882372aSHong Zhang } 3586882372aSHong Zhang if (fout) { 3596882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 3606882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 3616882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 3626882372aSHong Zhang } 363b2b77a04SHong Zhang break; 364b2b77a04SHong Zhang default: 3656882372aSHong Zhang /* Get local size */ 36611902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim1,pdim,comm,&local_n0,&local_0_start); 36711902ff2SHong Zhang // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 36811902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 36911902ff2SHong Zhang // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 37011902ff2SHong Zhang // for(i=0;i<ndim;i++) 37111902ff2SHong Zhang // { 37211902ff2SHong Zhang // pdim[i]=dim[i];printf("%d",pdim[i]); 37311902ff2SHong Zhang // } 37411902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 37511902ff2SHong Zhang // printf("The quantity n is %d",n); 3766882372aSHong Zhang if (fin) { 3776882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 3786882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 3796882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 3806882372aSHong Zhang } 3816882372aSHong Zhang if (fout) { 3826882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 3836882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 3846882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 3856882372aSHong Zhang } 386b2b77a04SHong Zhang break; 387b2b77a04SHong Zhang } 388b2b77a04SHong Zhang } 389fb7c10e0SHong Zhang if (fin){ 390262f75f9SHong Zhang ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 391fb7c10e0SHong Zhang } 392fb7c10e0SHong Zhang if (fout){ 393262f75f9SHong Zhang ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 394fb7c10e0SHong Zhang } 395b2b77a04SHong Zhang PetscFunctionReturn(0); 396b2b77a04SHong Zhang } 397b2b77a04SHong Zhang 398b2b77a04SHong Zhang EXTERN_C_BEGIN 399b2b77a04SHong Zhang #undef __FUNCT__ 400b2b77a04SHong Zhang #define __FUNCT__ "MatCreate_FFTW" 401b2b77a04SHong Zhang /* 402b2b77a04SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT 403b2b77a04SHong Zhang via the external package FFTW 404b2b77a04SHong Zhang 405b2b77a04SHong Zhang Options Database Keys: 406b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 407b2b77a04SHong Zhang 408b2b77a04SHong Zhang Level: intermediate 409b2b77a04SHong Zhang 410b2b77a04SHong Zhang */ 411b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 412b2b77a04SHong Zhang { 413b2b77a04SHong Zhang PetscErrorCode ierr; 414b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 415b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 416b2b77a04SHong Zhang Mat_FFTW *fftw; 417b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 418b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 419b2b77a04SHong Zhang PetscBool flg; 4206882372aSHong Zhang PetscInt p_flag,partial_dim=1,ctr; 42111902ff2SHong Zhang PetscMPIInt size,rank; 42211902ff2SHong Zhang ptrdiff_t *pdim; 423b2b77a04SHong Zhang 424b2b77a04SHong Zhang PetscFunctionBegin; 425b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 426b2b77a04SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 427b2b77a04SHong Zhang #endif 428b2b77a04SHong Zhang 429b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 43011902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 43111902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 43211902ff2SHong Zhang pdim[0] = dim[0]; 43311902ff2SHong Zhang for(ctr=1;ctr<ndim;ctr++) 43411902ff2SHong Zhang { 4356882372aSHong Zhang partial_dim*=dim[ctr]; 43611902ff2SHong Zhang pdim[ctr] = dim[ctr]; 4376882372aSHong Zhang } 43811902ff2SHong Zhang // printf("partial dimension is %d",partial_dim); 439b2b77a04SHong Zhang if (size == 1) { 440b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 441b2b77a04SHong Zhang n = N; 442b2b77a04SHong Zhang } else { 4436882372aSHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end; 444b2b77a04SHong Zhang switch (ndim){ 445b2b77a04SHong Zhang case 1: 4466882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 4476882372aSHong Zhang n = (PetscInt)local_n0; 4486882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 4496882372aSHong Zhang 450b2b77a04SHong Zhang break; 451b2b77a04SHong Zhang case 2: 452b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 453b2b77a04SHong Zhang /* 454b2b77a04SHong Zhang PetscMPIInt rank; 455b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 456b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 457b2b77a04SHong Zhang */ 458b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 459b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 460b2b77a04SHong Zhang break; 461b2b77a04SHong Zhang case 3: 4626882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 46311902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 4646882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 4656882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 466b2b77a04SHong Zhang break; 467b2b77a04SHong Zhang default: 46811902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 46911902ff2SHong Zhang // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 47011902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 4716882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 47211902ff2SHong Zhang // printf("New partial dimension is %d %d %d",n,N,ndim); 4736882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 474b2b77a04SHong Zhang break; 475b2b77a04SHong Zhang } 476b2b77a04SHong Zhang } 477b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 478b2b77a04SHong Zhang 479b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 480b2b77a04SHong Zhang fft->data = (void*)fftw; 481b2b77a04SHong Zhang 482b2b77a04SHong Zhang fft->n = n; 483b2b77a04SHong Zhang fftw->p_forward = 0; 484b2b77a04SHong Zhang fftw->p_backward = 0; 485b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 486b2b77a04SHong Zhang 487b2b77a04SHong Zhang if (size == 1){ 488b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 489b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 490b2b77a04SHong Zhang } else { 491b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 492b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 493b2b77a04SHong Zhang } 494b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 495b2b77a04SHong Zhang A->ops->getvecs = MatGetVecs_FFTW; 496b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 497b2b77a04SHong Zhang 498b2b77a04SHong Zhang /* get runtime options */ 499b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 500b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 501b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 502b2b77a04SHong Zhang PetscOptionsEnd(); 503b2b77a04SHong Zhang PetscFunctionReturn(0); 504b2b77a04SHong Zhang } 505b2b77a04SHong Zhang EXTERN_C_END 506