1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 14b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 15b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 16b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 17b2b77a04SHong Zhang } Mat_FFTW; 18b2b77a04SHong Zhang 19b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 20b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 24b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 26b2b77a04SHong Zhang 27b2b77a04SHong Zhang #undef __FUNCT__ 28b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 29b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 30b2b77a04SHong Zhang { 31b2b77a04SHong Zhang PetscErrorCode ierr; 32b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 33b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 34b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 35b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 36b2b77a04SHong Zhang 37b2b77a04SHong Zhang PetscFunctionBegin; 38b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 39b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 40b2b77a04SHong Zhang #endif 41b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 42b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 43b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 44b2b77a04SHong Zhang switch (ndim){ 45b2b77a04SHong Zhang case 1: 46b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 47b2b77a04SHong Zhang break; 48b2b77a04SHong Zhang case 2: 49b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 50b2b77a04SHong Zhang break; 51b2b77a04SHong Zhang case 3: 52b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 53b2b77a04SHong Zhang break; 54b2b77a04SHong Zhang default: 55b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 56b2b77a04SHong Zhang break; 57b2b77a04SHong Zhang } 58b2b77a04SHong Zhang fftw->finarray = x_array; 59b2b77a04SHong Zhang fftw->foutarray = y_array; 60b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 61b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 62b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 63b2b77a04SHong Zhang } else { /* use existing plan */ 64b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 65b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 66b2b77a04SHong Zhang } else { 67b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 68b2b77a04SHong Zhang } 69b2b77a04SHong Zhang } 70b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 71b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 72b2b77a04SHong Zhang PetscFunctionReturn(0); 73b2b77a04SHong Zhang } 74b2b77a04SHong Zhang 75b2b77a04SHong Zhang #undef __FUNCT__ 76b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 77b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 78b2b77a04SHong Zhang { 79b2b77a04SHong Zhang PetscErrorCode ierr; 80b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 81b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 82b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 83b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 84b2b77a04SHong Zhang 85b2b77a04SHong Zhang PetscFunctionBegin; 86b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 87b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 88b2b77a04SHong Zhang #endif 89b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 90b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 91b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 92b2b77a04SHong Zhang switch (ndim){ 93b2b77a04SHong Zhang case 1: 94b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 95b2b77a04SHong Zhang break; 96b2b77a04SHong Zhang case 2: 97b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 98b2b77a04SHong Zhang break; 99b2b77a04SHong Zhang case 3: 100b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 101b2b77a04SHong Zhang break; 102b2b77a04SHong Zhang default: 103b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 104b2b77a04SHong Zhang break; 105b2b77a04SHong Zhang } 106b2b77a04SHong Zhang fftw->binarray = x_array; 107b2b77a04SHong Zhang fftw->boutarray = y_array; 108b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 109b2b77a04SHong Zhang } else { /* use existing plan */ 110b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 111b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 112b2b77a04SHong Zhang } else { 113b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 114b2b77a04SHong Zhang } 115b2b77a04SHong Zhang } 116b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 117b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 118b2b77a04SHong Zhang PetscFunctionReturn(0); 119b2b77a04SHong Zhang } 120b2b77a04SHong Zhang 121b2b77a04SHong Zhang #undef __FUNCT__ 122b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 123b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 124b2b77a04SHong Zhang { 125b2b77a04SHong Zhang PetscErrorCode ierr; 126b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 127b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 128b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 129b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 130b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 131b2b77a04SHong Zhang 132b2b77a04SHong Zhang PetscFunctionBegin; 133b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 134b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 135b2b77a04SHong Zhang #endif 136b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 137b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 138b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 139b2b77a04SHong Zhang switch (ndim){ 140b2b77a04SHong Zhang case 1: 141b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 142b2b77a04SHong Zhang break; 143b2b77a04SHong Zhang case 2: 144b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 145b2b77a04SHong Zhang break; 146b2b77a04SHong Zhang case 3: 147b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 148b2b77a04SHong Zhang break; 149b2b77a04SHong Zhang default: 150*6882372aSHong Zhang fftw->p_forward = fftw_mpi_plan_dft(ndim,(const ptrdiff_t*)dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 151b2b77a04SHong Zhang break; 152b2b77a04SHong Zhang } 153b2b77a04SHong Zhang fftw->finarray = x_array; 154b2b77a04SHong Zhang fftw->foutarray = y_array; 155b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 156b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 157b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 158b2b77a04SHong Zhang } else { /* use existing plan */ 159b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 160b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 161b2b77a04SHong Zhang } else { 162b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 163b2b77a04SHong Zhang } 164b2b77a04SHong Zhang } 165b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 166b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 167b2b77a04SHong Zhang PetscFunctionReturn(0); 168b2b77a04SHong Zhang } 169b2b77a04SHong Zhang 170b2b77a04SHong Zhang #undef __FUNCT__ 171b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 172b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 173b2b77a04SHong Zhang { 174b2b77a04SHong Zhang PetscErrorCode ierr; 175b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 176b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 177b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 178b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 179b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 180b2b77a04SHong Zhang 181b2b77a04SHong Zhang PetscFunctionBegin; 182b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 183b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 184b2b77a04SHong Zhang #endif 185b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 186b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 187b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 188b2b77a04SHong Zhang switch (ndim){ 189b2b77a04SHong Zhang case 1: 190b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 191b2b77a04SHong Zhang break; 192b2b77a04SHong Zhang case 2: 193b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 194b2b77a04SHong Zhang break; 195b2b77a04SHong Zhang case 3: 196b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 197b2b77a04SHong Zhang break; 198b2b77a04SHong Zhang default: 199*6882372aSHong Zhang fftw->p_backward = fftw_mpi_plan_dft(ndim,(const ptrdiff_t*)dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 200b2b77a04SHong Zhang break; 201b2b77a04SHong Zhang } 202b2b77a04SHong Zhang fftw->binarray = x_array; 203b2b77a04SHong Zhang fftw->boutarray = y_array; 204b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 205b2b77a04SHong Zhang } else { /* use existing plan */ 206b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 207b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 208b2b77a04SHong Zhang } else { 209b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 210b2b77a04SHong Zhang } 211b2b77a04SHong Zhang } 212b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 213b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 214b2b77a04SHong Zhang PetscFunctionReturn(0); 215b2b77a04SHong Zhang } 216b2b77a04SHong Zhang 217b2b77a04SHong Zhang #undef __FUNCT__ 218b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 219b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 220b2b77a04SHong Zhang { 221b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 222b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 223b2b77a04SHong Zhang PetscErrorCode ierr; 224b2b77a04SHong Zhang 225b2b77a04SHong Zhang PetscFunctionBegin; 226b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 227b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 228b2b77a04SHong Zhang #endif 229b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 230b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 231bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 232b2b77a04SHong Zhang PetscFunctionReturn(0); 233b2b77a04SHong Zhang } 234b2b77a04SHong Zhang 235c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 236b2b77a04SHong Zhang #undef __FUNCT__ 237b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 238b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 239b2b77a04SHong Zhang { 240b2b77a04SHong Zhang PetscErrorCode ierr; 241b2b77a04SHong Zhang PetscScalar *array; 242b2b77a04SHong Zhang 243b2b77a04SHong Zhang PetscFunctionBegin; 244b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 245b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 246b2b77a04SHong Zhang #endif 247b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 248b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 249b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 250b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 251b2b77a04SHong Zhang PetscFunctionReturn(0); 252b2b77a04SHong Zhang } 253b2b77a04SHong Zhang 254b2b77a04SHong Zhang #undef __FUNCT__ 255b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW" 256b2b77a04SHong Zhang /* 257b2b77a04SHong Zhang MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 258b2b77a04SHong Zhang parallel layout determined by FFTW 259b2b77a04SHong Zhang 260b2b77a04SHong Zhang Collective on Mat 261b2b77a04SHong Zhang 262b2b77a04SHong Zhang Input Parameter: 263b2b77a04SHong Zhang . mat - the matrix 264b2b77a04SHong Zhang 265b2b77a04SHong Zhang Output Parameter: 266b2b77a04SHong Zhang + fin - (optional) input vector of forward FFTW 267b2b77a04SHong Zhang - fout - (optional) output vector of forward FFTW 268b2b77a04SHong Zhang 269b2b77a04SHong Zhang Level: advanced 270b2b77a04SHong Zhang 271b2b77a04SHong Zhang .seealso: MatCreateFFTW() 272b2b77a04SHong Zhang */ 273b2b77a04SHong Zhang PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 274b2b77a04SHong Zhang { 275b2b77a04SHong Zhang PetscErrorCode ierr; 276b2b77a04SHong Zhang PetscMPIInt size,rank; 277b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 278b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 279b2b77a04SHong Zhang PetscInt N=fft->N; 280b2b77a04SHong Zhang 281b2b77a04SHong Zhang PetscFunctionBegin; 282b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 283b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 284b2b77a04SHong Zhang #endif 285b2b77a04SHong Zhang PetscValidHeaderSpecific(A,MAT_CLASSID,1); 286b2b77a04SHong Zhang PetscValidType(A,1); 287b2b77a04SHong Zhang 288b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 289b2b77a04SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 290b2b77a04SHong Zhang if (size == 1){ /* sequential case */ 291b2b77a04SHong Zhang if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 292b2b77a04SHong Zhang if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 293b2b77a04SHong Zhang } else { /* mpi case */ 294b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 295*6882372aSHong Zhang ptrdiff_t local_n1,local_1_end; 296b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 297b2b77a04SHong Zhang fftw_complex *data_fin,*data_fout; 298b2b77a04SHong Zhang 299b2b77a04SHong Zhang switch (ndim){ 300b2b77a04SHong Zhang case 1: 301*6882372aSHong Zhang /* Get local size */ 302*6882372aSHong Zhang 303*6882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 304*6882372aSHong Zhang if (fin) { 305*6882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 306*6882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 307*6882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 308*6882372aSHong Zhang } 309*6882372aSHong Zhang if (fout) { 310*6882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 311*6882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 312*6882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 313*6882372aSHong Zhang } 314b2b77a04SHong Zhang break; 315b2b77a04SHong Zhang case 2: 316b2b77a04SHong Zhang /* Get local size */ 317b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 318b2b77a04SHong Zhang if (fin) { 319b2b77a04SHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 320b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 321b2b77a04SHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 322b2b77a04SHong Zhang } 323b2b77a04SHong Zhang if (fout) { 324b2b77a04SHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 325b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 326b2b77a04SHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 327b2b77a04SHong Zhang } 328b2b77a04SHong Zhang break; 329b2b77a04SHong Zhang case 3: 330*6882372aSHong Zhang /* Get local size */ 331*6882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 332*6882372aSHong Zhang if (fin) { 333*6882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 334*6882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 335*6882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 336*6882372aSHong Zhang } 337*6882372aSHong Zhang if (fout) { 338*6882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 339*6882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 340*6882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 341*6882372aSHong Zhang } 342b2b77a04SHong Zhang break; 343b2b77a04SHong Zhang default: 344*6882372aSHong Zhang /* Get local size */ 345*6882372aSHong Zhang alloc_local = fftw_mpi_local_size(ndim,(const ptrdiff_t*)dim,comm,&local_n0,&local_0_start); 346*6882372aSHong Zhang if (fin) { 347*6882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 348*6882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 349*6882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 350*6882372aSHong Zhang } 351*6882372aSHong Zhang if (fout) { 352*6882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 353*6882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 354*6882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 355*6882372aSHong Zhang } 356b2b77a04SHong Zhang break; 357b2b77a04SHong Zhang } 358b2b77a04SHong Zhang } 359fb7c10e0SHong Zhang if (fin){ 360262f75f9SHong Zhang ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 361fb7c10e0SHong Zhang } 362fb7c10e0SHong Zhang if (fout){ 363262f75f9SHong Zhang ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 364fb7c10e0SHong Zhang } 365b2b77a04SHong Zhang PetscFunctionReturn(0); 366b2b77a04SHong Zhang } 367b2b77a04SHong Zhang 368b2b77a04SHong Zhang EXTERN_C_BEGIN 369b2b77a04SHong Zhang #undef __FUNCT__ 370b2b77a04SHong Zhang #define __FUNCT__ "MatCreate_FFTW" 371b2b77a04SHong Zhang /* 372b2b77a04SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT 373b2b77a04SHong Zhang via the external package FFTW 374b2b77a04SHong Zhang 375b2b77a04SHong Zhang Options Database Keys: 376b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 377b2b77a04SHong Zhang 378b2b77a04SHong Zhang Level: intermediate 379b2b77a04SHong Zhang 380b2b77a04SHong Zhang */ 381b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 382b2b77a04SHong Zhang { 383b2b77a04SHong Zhang PetscErrorCode ierr; 384b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 385b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 386b2b77a04SHong Zhang Mat_FFTW *fftw; 387b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 388b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 389b2b77a04SHong Zhang PetscBool flg; 390*6882372aSHong Zhang PetscInt p_flag,partial_dim=1,ctr; 391b2b77a04SHong Zhang PetscMPIInt size; 392b2b77a04SHong Zhang 393b2b77a04SHong Zhang PetscFunctionBegin; 394b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 395b2b77a04SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 396b2b77a04SHong Zhang #endif 397b2b77a04SHong Zhang 398b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 399*6882372aSHong Zhang for(ctr=1;ctr<ndim;ctr++){ 400*6882372aSHong Zhang partial_dim*=dim[ctr]; 401*6882372aSHong Zhang } 402b2b77a04SHong Zhang if (size == 1) { 403b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 404b2b77a04SHong Zhang n = N; 405b2b77a04SHong Zhang } else { 406*6882372aSHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end; 407b2b77a04SHong Zhang switch (ndim){ 408b2b77a04SHong Zhang case 1: 409*6882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 410*6882372aSHong Zhang n = (PetscInt)local_n0; 411*6882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 412*6882372aSHong Zhang 413b2b77a04SHong Zhang break; 414b2b77a04SHong Zhang case 2: 415b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 416b2b77a04SHong Zhang /* 417b2b77a04SHong Zhang PetscMPIInt rank; 418b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 419b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 420b2b77a04SHong Zhang */ 421b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 422b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 423b2b77a04SHong Zhang break; 424b2b77a04SHong Zhang case 3: 425*6882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 426*6882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 427*6882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 428b2b77a04SHong Zhang break; 429b2b77a04SHong Zhang default: 430*6882372aSHong Zhang alloc_local = fftw_mpi_local_size(ndim,(const ptrdiff_t*)dim,comm,&local_n0,&local_0_start); 431*6882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 432*6882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 433b2b77a04SHong Zhang break; 434b2b77a04SHong Zhang } 435b2b77a04SHong Zhang } 436b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 437b2b77a04SHong Zhang 438b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 439b2b77a04SHong Zhang fft->data = (void*)fftw; 440b2b77a04SHong Zhang 441b2b77a04SHong Zhang fft->n = n; 442b2b77a04SHong Zhang fftw->p_forward = 0; 443b2b77a04SHong Zhang fftw->p_backward = 0; 444b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 445b2b77a04SHong Zhang 446b2b77a04SHong Zhang if (size == 1){ 447b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 448b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 449b2b77a04SHong Zhang } else { 450b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 451b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 452b2b77a04SHong Zhang } 453b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 454b2b77a04SHong Zhang A->ops->getvecs = MatGetVecs_FFTW; 455b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 456b2b77a04SHong Zhang 457b2b77a04SHong Zhang /* get runtime options */ 458b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 459b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 460b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 461b2b77a04SHong Zhang PetscOptionsEnd(); 462b2b77a04SHong Zhang PetscFunctionReturn(0); 463b2b77a04SHong Zhang } 464b2b77a04SHong Zhang EXTERN_C_END 465