1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 14b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 15b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 16b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 17b2b77a04SHong Zhang } Mat_FFTW; 18b2b77a04SHong Zhang 19b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 20b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 24b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 26b2b77a04SHong Zhang 27b2b77a04SHong Zhang #undef __FUNCT__ 28b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 29b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 30b2b77a04SHong Zhang { 31b2b77a04SHong Zhang PetscErrorCode ierr; 32b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 33b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 34b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 35b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 36b2b77a04SHong Zhang 37b2b77a04SHong Zhang PetscFunctionBegin; 38b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 39b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 40b2b77a04SHong Zhang #endif 41b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 42b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 43b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 44b2b77a04SHong Zhang switch (ndim){ 45b2b77a04SHong Zhang case 1: 46b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 47b2b77a04SHong Zhang break; 48b2b77a04SHong Zhang case 2: 49b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 50b2b77a04SHong Zhang break; 51b2b77a04SHong Zhang case 3: 52b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 53b2b77a04SHong Zhang break; 54b2b77a04SHong Zhang default: 55b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 56b2b77a04SHong Zhang break; 57b2b77a04SHong Zhang } 58b2b77a04SHong Zhang fftw->finarray = x_array; 59b2b77a04SHong Zhang fftw->foutarray = y_array; 60b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 61b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 62b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 63b2b77a04SHong Zhang } else { /* use existing plan */ 64b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 65b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 66b2b77a04SHong Zhang } else { 67b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 68b2b77a04SHong Zhang } 69b2b77a04SHong Zhang } 70b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 71b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 72b2b77a04SHong Zhang PetscFunctionReturn(0); 73b2b77a04SHong Zhang } 74b2b77a04SHong Zhang 75b2b77a04SHong Zhang #undef __FUNCT__ 76b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 77b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 78b2b77a04SHong Zhang { 79b2b77a04SHong Zhang PetscErrorCode ierr; 80b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 81b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 82b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 83b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 84b2b77a04SHong Zhang 85b2b77a04SHong Zhang PetscFunctionBegin; 86b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 87b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 88b2b77a04SHong Zhang #endif 89b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 90b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 91b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 92b2b77a04SHong Zhang switch (ndim){ 93b2b77a04SHong Zhang case 1: 94b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 95b2b77a04SHong Zhang break; 96b2b77a04SHong Zhang case 2: 97b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 98b2b77a04SHong Zhang break; 99b2b77a04SHong Zhang case 3: 100b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 101b2b77a04SHong Zhang break; 102b2b77a04SHong Zhang default: 103b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 104b2b77a04SHong Zhang break; 105b2b77a04SHong Zhang } 106b2b77a04SHong Zhang fftw->binarray = x_array; 107b2b77a04SHong Zhang fftw->boutarray = y_array; 108b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 109b2b77a04SHong Zhang } else { /* use existing plan */ 110b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 111b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 112b2b77a04SHong Zhang } else { 113b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 114b2b77a04SHong Zhang } 115b2b77a04SHong Zhang } 116b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 117b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 118b2b77a04SHong Zhang PetscFunctionReturn(0); 119b2b77a04SHong Zhang } 120b2b77a04SHong Zhang 121b2b77a04SHong Zhang #undef __FUNCT__ 122b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 123b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 124b2b77a04SHong Zhang { 125b2b77a04SHong Zhang PetscErrorCode ierr; 126b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 127b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 128b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 129b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 130b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 131b2b77a04SHong Zhang 132b2b77a04SHong Zhang PetscFunctionBegin; 133b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 134b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 135b2b77a04SHong Zhang #endif 136b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 137b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 138b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 139b2b77a04SHong Zhang switch (ndim){ 140b2b77a04SHong Zhang case 1: 141b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 142b2b77a04SHong Zhang break; 143b2b77a04SHong Zhang case 2: 144b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 145b2b77a04SHong Zhang break; 146b2b77a04SHong Zhang case 3: 147b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 148b2b77a04SHong Zhang break; 149b2b77a04SHong Zhang default: 150b2b77a04SHong Zhang /* 151b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 152b2b77a04SHong Zhang */ 153b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not supported yet"); 154b2b77a04SHong Zhang break; 155b2b77a04SHong Zhang } 156b2b77a04SHong Zhang fftw->finarray = x_array; 157b2b77a04SHong Zhang fftw->foutarray = y_array; 158b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 159b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 160b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 161b2b77a04SHong Zhang } else { /* use existing plan */ 162b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 163b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 164b2b77a04SHong Zhang } else { 165b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 166b2b77a04SHong Zhang } 167b2b77a04SHong Zhang } 168b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 169b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 170b2b77a04SHong Zhang PetscFunctionReturn(0); 171b2b77a04SHong Zhang } 172b2b77a04SHong Zhang 173b2b77a04SHong Zhang #undef __FUNCT__ 174b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 175b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 176b2b77a04SHong Zhang { 177b2b77a04SHong Zhang PetscErrorCode ierr; 178b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 179b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 180b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 181b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 182b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 183b2b77a04SHong Zhang 184b2b77a04SHong Zhang PetscFunctionBegin; 185b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 186b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 187b2b77a04SHong Zhang #endif 188b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 189b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 190b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 191b2b77a04SHong Zhang switch (ndim){ 192b2b77a04SHong Zhang case 1: 193b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 194b2b77a04SHong Zhang break; 195b2b77a04SHong Zhang case 2: 196b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 197b2b77a04SHong Zhang break; 198b2b77a04SHong Zhang case 3: 199b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 200b2b77a04SHong Zhang break; 201b2b77a04SHong Zhang default: 202b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not supported yet"); 203b2b77a04SHong Zhang /* fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); */ 204b2b77a04SHong Zhang break; 205b2b77a04SHong Zhang } 206b2b77a04SHong Zhang fftw->binarray = x_array; 207b2b77a04SHong Zhang fftw->boutarray = y_array; 208b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 209b2b77a04SHong Zhang } else { /* use existing plan */ 210b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 211b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 212b2b77a04SHong Zhang } else { 213b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 214b2b77a04SHong Zhang } 215b2b77a04SHong Zhang } 216b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 217b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 218b2b77a04SHong Zhang PetscFunctionReturn(0); 219b2b77a04SHong Zhang } 220b2b77a04SHong Zhang 221b2b77a04SHong Zhang #undef __FUNCT__ 222b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 223b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 224b2b77a04SHong Zhang { 225b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 226b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 227b2b77a04SHong Zhang PetscErrorCode ierr; 228b2b77a04SHong Zhang 229b2b77a04SHong Zhang PetscFunctionBegin; 230b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 231b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 232b2b77a04SHong Zhang #endif 233b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 234b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 235bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 236b2b77a04SHong Zhang PetscFunctionReturn(0); 237b2b77a04SHong Zhang } 238b2b77a04SHong Zhang 239c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 240b2b77a04SHong Zhang #undef __FUNCT__ 241b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 242b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 243b2b77a04SHong Zhang { 244b2b77a04SHong Zhang PetscErrorCode ierr; 245b2b77a04SHong Zhang PetscScalar *array; 246b2b77a04SHong Zhang 247b2b77a04SHong Zhang PetscFunctionBegin; 248b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 249b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 250b2b77a04SHong Zhang #endif 251b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 252b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 253b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 254b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 255b2b77a04SHong Zhang PetscFunctionReturn(0); 256b2b77a04SHong Zhang } 257b2b77a04SHong Zhang 258b2b77a04SHong Zhang #undef __FUNCT__ 259b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW" 260b2b77a04SHong Zhang /* 261b2b77a04SHong Zhang MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 262b2b77a04SHong Zhang parallel layout determined by FFTW 263b2b77a04SHong Zhang 264b2b77a04SHong Zhang Collective on Mat 265b2b77a04SHong Zhang 266b2b77a04SHong Zhang Input Parameter: 267b2b77a04SHong Zhang . mat - the matrix 268b2b77a04SHong Zhang 269b2b77a04SHong Zhang Output Parameter: 270b2b77a04SHong Zhang + fin - (optional) input vector of forward FFTW 271b2b77a04SHong Zhang - fout - (optional) output vector of forward FFTW 272b2b77a04SHong Zhang 273b2b77a04SHong Zhang Level: advanced 274b2b77a04SHong Zhang 275b2b77a04SHong Zhang .seealso: MatCreateFFTW() 276b2b77a04SHong Zhang */ 277b2b77a04SHong Zhang PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 278b2b77a04SHong Zhang { 279b2b77a04SHong Zhang PetscErrorCode ierr; 280b2b77a04SHong Zhang PetscMPIInt size,rank; 281b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 282b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 283b2b77a04SHong Zhang PetscInt N=fft->N; 284b2b77a04SHong Zhang 285b2b77a04SHong Zhang PetscFunctionBegin; 286b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 287b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 288b2b77a04SHong Zhang #endif 289b2b77a04SHong Zhang PetscValidHeaderSpecific(A,MAT_CLASSID,1); 290b2b77a04SHong Zhang PetscValidType(A,1); 291b2b77a04SHong Zhang 292b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 293b2b77a04SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 294b2b77a04SHong Zhang if (size == 1){ /* sequential case */ 295b2b77a04SHong Zhang if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 296b2b77a04SHong Zhang if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 297b2b77a04SHong Zhang } else { /* mpi case */ 298b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 299b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 300b2b77a04SHong Zhang fftw_complex *data_fin,*data_fout; 301b2b77a04SHong Zhang 302b2b77a04SHong Zhang switch (ndim){ 303b2b77a04SHong Zhang case 1: 304b2b77a04SHong Zhang SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Not supported yet"); 305b2b77a04SHong Zhang break; 306b2b77a04SHong Zhang case 2: 307b2b77a04SHong Zhang /* Get local size */ 308b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 309b2b77a04SHong Zhang if (fin) { 310b2b77a04SHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 311b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 312b2b77a04SHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 313b2b77a04SHong Zhang } 314b2b77a04SHong Zhang if (fout) { 315b2b77a04SHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 316b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 317b2b77a04SHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 318b2b77a04SHong Zhang } 319b2b77a04SHong Zhang break; 320b2b77a04SHong Zhang case 3: 321b2b77a04SHong Zhang SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Not supported yet"); 322b2b77a04SHong Zhang break; 323b2b77a04SHong Zhang default: 324b2b77a04SHong Zhang SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Not supported yet"); 325b2b77a04SHong Zhang break; 326b2b77a04SHong Zhang } 327b2b77a04SHong Zhang } 328*fb7c10e0SHong Zhang if (fin){ 329262f75f9SHong Zhang ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 330*fb7c10e0SHong Zhang } 331*fb7c10e0SHong Zhang if (fout){ 332262f75f9SHong Zhang ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 333*fb7c10e0SHong Zhang } 334b2b77a04SHong Zhang PetscFunctionReturn(0); 335b2b77a04SHong Zhang } 336b2b77a04SHong Zhang 337b2b77a04SHong Zhang EXTERN_C_BEGIN 338b2b77a04SHong Zhang #undef __FUNCT__ 339b2b77a04SHong Zhang #define __FUNCT__ "MatCreate_FFTW" 340b2b77a04SHong Zhang /* 341b2b77a04SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT 342b2b77a04SHong Zhang via the external package FFTW 343b2b77a04SHong Zhang 344b2b77a04SHong Zhang Options Database Keys: 345b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 346b2b77a04SHong Zhang 347b2b77a04SHong Zhang Level: intermediate 348b2b77a04SHong Zhang 349b2b77a04SHong Zhang */ 350b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 351b2b77a04SHong Zhang { 352b2b77a04SHong Zhang PetscErrorCode ierr; 353b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 354b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 355b2b77a04SHong Zhang Mat_FFTW *fftw; 356b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 357b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 358b2b77a04SHong Zhang PetscBool flg; 359b2b77a04SHong Zhang PetscInt p_flag; 360b2b77a04SHong Zhang PetscMPIInt size; 361b2b77a04SHong Zhang 362b2b77a04SHong Zhang PetscFunctionBegin; 363b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 364b2b77a04SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 365b2b77a04SHong Zhang #endif 366b2b77a04SHong Zhang 367b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 368b2b77a04SHong Zhang //ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 369b2b77a04SHong Zhang 370b2b77a04SHong Zhang 371b2b77a04SHong Zhang if (size == 1) { 372b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 373b2b77a04SHong Zhang n = N; 374b2b77a04SHong Zhang } else { 375b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 376b2b77a04SHong Zhang switch (ndim){ 377b2b77a04SHong Zhang case 1: 378b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not implemented yet"); 379b2b77a04SHong Zhang break; 380b2b77a04SHong Zhang case 2: 381b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 382b2b77a04SHong Zhang /* 383b2b77a04SHong Zhang PetscMPIInt rank; 384b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 385b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 386b2b77a04SHong Zhang */ 387b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 388b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 389b2b77a04SHong Zhang break; 390b2b77a04SHong Zhang case 3: 391b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not implemented yet"); 392b2b77a04SHong Zhang break; 393b2b77a04SHong Zhang default: 394b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not implemented yet"); 395b2b77a04SHong Zhang break; 396b2b77a04SHong Zhang } 397b2b77a04SHong Zhang } 398b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 399b2b77a04SHong Zhang 400b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 401b2b77a04SHong Zhang fft->data = (void*)fftw; 402b2b77a04SHong Zhang 403b2b77a04SHong Zhang fft->n = n; 404b2b77a04SHong Zhang fftw->p_forward = 0; 405b2b77a04SHong Zhang fftw->p_backward = 0; 406b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 407b2b77a04SHong Zhang 408b2b77a04SHong Zhang if (size == 1){ 409b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 410b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 411b2b77a04SHong Zhang } else { 412b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 413b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 414b2b77a04SHong Zhang } 415b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 416b2b77a04SHong Zhang A->ops->getvecs = MatGetVecs_FFTW; 417b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 418b2b77a04SHong Zhang 419b2b77a04SHong Zhang /* get runtime options */ 420b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 421b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 422b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 423b2b77a04SHong Zhang PetscOptionsEnd(); 424b2b77a04SHong Zhang PetscFunctionReturn(0); 425b2b77a04SHong Zhang } 426b2b77a04SHong Zhang EXTERN_C_END 427