1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 14b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 15b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 16b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 17b2b77a04SHong Zhang } Mat_FFTW; 18b2b77a04SHong Zhang 19b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 20b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 24b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 26b2b77a04SHong Zhang 27b2b77a04SHong Zhang #undef __FUNCT__ 28b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 29b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 30b2b77a04SHong Zhang { 31b2b77a04SHong Zhang PetscErrorCode ierr; 32b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 33b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 34b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 35b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 36b2b77a04SHong Zhang 37b2b77a04SHong Zhang PetscFunctionBegin; 38b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 39b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 40b2b77a04SHong Zhang #endif 41b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 42b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 43b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 44b2b77a04SHong Zhang switch (ndim){ 45b2b77a04SHong Zhang case 1: 46b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 47b2b77a04SHong Zhang break; 48b2b77a04SHong Zhang case 2: 49b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 50b2b77a04SHong Zhang break; 51b2b77a04SHong Zhang case 3: 52b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 53b2b77a04SHong Zhang break; 54b2b77a04SHong Zhang default: 55b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 56b2b77a04SHong Zhang break; 57b2b77a04SHong Zhang } 58b2b77a04SHong Zhang fftw->finarray = x_array; 59b2b77a04SHong Zhang fftw->foutarray = y_array; 60b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 61b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 62b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 63b2b77a04SHong Zhang } else { /* use existing plan */ 64b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 65b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 66b2b77a04SHong Zhang } else { 67b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 68b2b77a04SHong Zhang } 69b2b77a04SHong Zhang } 70b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 71b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 72b2b77a04SHong Zhang PetscFunctionReturn(0); 73b2b77a04SHong Zhang } 74b2b77a04SHong Zhang 75b2b77a04SHong Zhang #undef __FUNCT__ 76b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 77b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 78b2b77a04SHong Zhang { 79b2b77a04SHong Zhang PetscErrorCode ierr; 80b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 81b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 82b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 83b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 84b2b77a04SHong Zhang 85b2b77a04SHong Zhang PetscFunctionBegin; 86b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 87b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 88b2b77a04SHong Zhang #endif 89b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 90b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 91b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 92b2b77a04SHong Zhang switch (ndim){ 93b2b77a04SHong Zhang case 1: 94b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 95b2b77a04SHong Zhang break; 96b2b77a04SHong Zhang case 2: 97b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 98b2b77a04SHong Zhang break; 99b2b77a04SHong Zhang case 3: 100b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 101b2b77a04SHong Zhang break; 102b2b77a04SHong Zhang default: 103b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 104b2b77a04SHong Zhang break; 105b2b77a04SHong Zhang } 106b2b77a04SHong Zhang fftw->binarray = x_array; 107b2b77a04SHong Zhang fftw->boutarray = y_array; 108b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 109b2b77a04SHong Zhang } else { /* use existing plan */ 110b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 111b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 112b2b77a04SHong Zhang } else { 113b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 114b2b77a04SHong Zhang } 115b2b77a04SHong Zhang } 116b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 117b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 118b2b77a04SHong Zhang PetscFunctionReturn(0); 119b2b77a04SHong Zhang } 120b2b77a04SHong Zhang 121b2b77a04SHong Zhang #undef __FUNCT__ 122b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 123b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 124b2b77a04SHong Zhang { 125b2b77a04SHong Zhang PetscErrorCode ierr; 126b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 127b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 128b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 129*11902ff2SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim,ctr; 130b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 131*11902ff2SHong Zhang ptrdiff_t ndim1=(ptrdiff_t) ndim,*pdim; 132b2b77a04SHong Zhang 133b2b77a04SHong Zhang PetscFunctionBegin; 134b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 135b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 136b2b77a04SHong Zhang #endif 137*11902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 138*11902ff2SHong Zhang for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 139*11902ff2SHong Zhang 140b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 141b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 142b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 143b2b77a04SHong Zhang switch (ndim){ 144b2b77a04SHong Zhang case 1: 145b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 146b2b77a04SHong Zhang break; 147b2b77a04SHong Zhang case 2: 148b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 149b2b77a04SHong Zhang break; 150b2b77a04SHong Zhang case 3: 151b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 152b2b77a04SHong Zhang break; 153b2b77a04SHong Zhang default: 154*11902ff2SHong Zhang fftw->p_forward = fftw_mpi_plan_dft(ndim1,pdim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 155*11902ff2SHong Zhang // fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 156b2b77a04SHong Zhang break; 157b2b77a04SHong Zhang } 158b2b77a04SHong Zhang fftw->finarray = x_array; 159b2b77a04SHong Zhang fftw->foutarray = y_array; 160b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 161b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 162b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 163b2b77a04SHong Zhang } else { /* use existing plan */ 164b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 165b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 166b2b77a04SHong Zhang } else { 167b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 168b2b77a04SHong Zhang } 169b2b77a04SHong Zhang } 170b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 171b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 172b2b77a04SHong Zhang PetscFunctionReturn(0); 173b2b77a04SHong Zhang } 174b2b77a04SHong Zhang 175b2b77a04SHong Zhang #undef __FUNCT__ 176b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 177b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 178b2b77a04SHong Zhang { 179b2b77a04SHong Zhang PetscErrorCode ierr; 180b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 181b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 182b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 183*11902ff2SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim,ctr; 184b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 185*11902ff2SHong Zhang ptrdiff_t ndim1=(ptrdiff_t)ndim,*pdim; 186b2b77a04SHong Zhang 187b2b77a04SHong Zhang PetscFunctionBegin; 188b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 189b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 190b2b77a04SHong Zhang #endif 191*11902ff2SHong Zhang ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr); // should pdim be a member of Mat_FFTW? 192*11902ff2SHong Zhang for(ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 193*11902ff2SHong Zhang 194b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 195b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 196b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 197b2b77a04SHong Zhang switch (ndim){ 198b2b77a04SHong Zhang case 1: 199b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 200b2b77a04SHong Zhang break; 201b2b77a04SHong Zhang case 2: 202b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 203b2b77a04SHong Zhang break; 204b2b77a04SHong Zhang case 3: 205b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 206b2b77a04SHong Zhang break; 207b2b77a04SHong Zhang default: 208*11902ff2SHong Zhang fftw->p_backward = fftw_mpi_plan_dft(ndim1,pdim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 209b2b77a04SHong Zhang break; 210b2b77a04SHong Zhang } 211b2b77a04SHong Zhang fftw->binarray = x_array; 212b2b77a04SHong Zhang fftw->boutarray = y_array; 213b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 214b2b77a04SHong Zhang } else { /* use existing plan */ 215b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 216b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 217b2b77a04SHong Zhang } else { 218b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 219b2b77a04SHong Zhang } 220b2b77a04SHong Zhang } 221b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 222b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 223*11902ff2SHong Zhang ierr = PetscFree(pdim);CHKERRQ(ierr); 224b2b77a04SHong Zhang PetscFunctionReturn(0); 225b2b77a04SHong Zhang } 226b2b77a04SHong Zhang 227b2b77a04SHong Zhang #undef __FUNCT__ 228b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 229b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 230b2b77a04SHong Zhang { 231b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 232b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 233b2b77a04SHong Zhang PetscErrorCode ierr; 234b2b77a04SHong Zhang 235b2b77a04SHong Zhang PetscFunctionBegin; 236b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 237b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 238b2b77a04SHong Zhang #endif 239b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 240b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 241bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 242b2b77a04SHong Zhang PetscFunctionReturn(0); 243b2b77a04SHong Zhang } 244b2b77a04SHong Zhang 245c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 246b2b77a04SHong Zhang #undef __FUNCT__ 247b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 248b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 249b2b77a04SHong Zhang { 250b2b77a04SHong Zhang PetscErrorCode ierr; 251b2b77a04SHong Zhang PetscScalar *array; 252b2b77a04SHong Zhang 253b2b77a04SHong Zhang PetscFunctionBegin; 254b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 255b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 256b2b77a04SHong Zhang #endif 257b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 258b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 259b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 260b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 261b2b77a04SHong Zhang PetscFunctionReturn(0); 262b2b77a04SHong Zhang } 263b2b77a04SHong Zhang 264b2b77a04SHong Zhang #undef __FUNCT__ 265b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW" 266b2b77a04SHong Zhang /* 267b2b77a04SHong Zhang MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 268b2b77a04SHong Zhang parallel layout determined by FFTW 269b2b77a04SHong Zhang 270b2b77a04SHong Zhang Collective on Mat 271b2b77a04SHong Zhang 272b2b77a04SHong Zhang Input Parameter: 273b2b77a04SHong Zhang . mat - the matrix 274b2b77a04SHong Zhang 275b2b77a04SHong Zhang Output Parameter: 276b2b77a04SHong Zhang + fin - (optional) input vector of forward FFTW 277b2b77a04SHong Zhang - fout - (optional) output vector of forward FFTW 278b2b77a04SHong Zhang 279b2b77a04SHong Zhang Level: advanced 280b2b77a04SHong Zhang 281b2b77a04SHong Zhang .seealso: MatCreateFFTW() 282b2b77a04SHong Zhang */ 283b2b77a04SHong Zhang PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 284b2b77a04SHong Zhang { 285b2b77a04SHong Zhang PetscErrorCode ierr; 286b2b77a04SHong Zhang PetscMPIInt size,rank; 287b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 288b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 289b2b77a04SHong Zhang PetscInt N=fft->N; 290b2b77a04SHong Zhang 291b2b77a04SHong Zhang PetscFunctionBegin; 292b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 293b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 294b2b77a04SHong Zhang #endif 295b2b77a04SHong Zhang PetscValidHeaderSpecific(A,MAT_CLASSID,1); 296b2b77a04SHong Zhang PetscValidType(A,1); 297b2b77a04SHong Zhang 298b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 299b2b77a04SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 300b2b77a04SHong Zhang if (size == 1){ /* sequential case */ 301b2b77a04SHong Zhang if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 302b2b77a04SHong Zhang if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 303b2b77a04SHong Zhang } else { /* mpi case */ 304b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 3056882372aSHong Zhang ptrdiff_t local_n1,local_1_end; 306*11902ff2SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n,ctr; 307b2b77a04SHong Zhang fftw_complex *data_fin,*data_fout; 308*11902ff2SHong Zhang ptrdiff_t ndim1,*pdim; 309*11902ff2SHong Zhang ndim1=(ptrdiff_t) ndim; 310*11902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 311*11902ff2SHong Zhang 312*11902ff2SHong Zhang for(ctr=0;ctr<ndim;ctr++) 313*11902ff2SHong Zhang { 314*11902ff2SHong Zhang pdim[ctr] = dim[ctr]; 315*11902ff2SHong Zhang } 316b2b77a04SHong Zhang 317b2b77a04SHong Zhang switch (ndim){ 318b2b77a04SHong Zhang case 1: 3196882372aSHong Zhang /* Get local size */ 3206882372aSHong Zhang 3216882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 3226882372aSHong Zhang if (fin) { 3236882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 3246882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 3256882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 3266882372aSHong Zhang } 3276882372aSHong Zhang if (fout) { 3286882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 3296882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 3306882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 3316882372aSHong Zhang } 332b2b77a04SHong Zhang break; 333b2b77a04SHong Zhang case 2: 334b2b77a04SHong Zhang /* Get local size */ 335b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 336b2b77a04SHong Zhang if (fin) { 337b2b77a04SHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 338b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 339b2b77a04SHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 340b2b77a04SHong Zhang } 341b2b77a04SHong Zhang if (fout) { 342b2b77a04SHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 343b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 344b2b77a04SHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 345b2b77a04SHong Zhang } 346b2b77a04SHong Zhang break; 347b2b77a04SHong Zhang case 3: 3486882372aSHong Zhang /* Get local size */ 3496882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 350*11902ff2SHong Zhang // printf("The quantity n is %d",n); 3516882372aSHong Zhang if (fin) { 3526882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 3536882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 3546882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 3556882372aSHong Zhang } 3566882372aSHong Zhang if (fout) { 3576882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 3586882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 3596882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 3606882372aSHong Zhang } 361b2b77a04SHong Zhang break; 362b2b77a04SHong Zhang default: 3636882372aSHong Zhang /* Get local size */ 364*11902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim1,pdim,comm,&local_n0,&local_0_start); 365*11902ff2SHong Zhang // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 366*11902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 367*11902ff2SHong Zhang // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 368*11902ff2SHong Zhang // for(i=0;i<ndim;i++) 369*11902ff2SHong Zhang // { 370*11902ff2SHong Zhang // pdim[i]=dim[i];printf("%d",pdim[i]); 371*11902ff2SHong Zhang // } 372*11902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 373*11902ff2SHong Zhang // printf("The quantity n is %d",n); 3746882372aSHong Zhang if (fin) { 3756882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 3766882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 3776882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 3786882372aSHong Zhang } 3796882372aSHong Zhang if (fout) { 3806882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 3816882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 3826882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 3836882372aSHong Zhang } 384b2b77a04SHong Zhang break; 385b2b77a04SHong Zhang } 386b2b77a04SHong Zhang } 387fb7c10e0SHong Zhang if (fin){ 388262f75f9SHong Zhang ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 389fb7c10e0SHong Zhang } 390fb7c10e0SHong Zhang if (fout){ 391262f75f9SHong Zhang ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 392fb7c10e0SHong Zhang } 393b2b77a04SHong Zhang PetscFunctionReturn(0); 394b2b77a04SHong Zhang } 395b2b77a04SHong Zhang 396b2b77a04SHong Zhang EXTERN_C_BEGIN 397b2b77a04SHong Zhang #undef __FUNCT__ 398b2b77a04SHong Zhang #define __FUNCT__ "MatCreate_FFTW" 399b2b77a04SHong Zhang /* 400b2b77a04SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT 401b2b77a04SHong Zhang via the external package FFTW 402b2b77a04SHong Zhang 403b2b77a04SHong Zhang Options Database Keys: 404b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 405b2b77a04SHong Zhang 406b2b77a04SHong Zhang Level: intermediate 407b2b77a04SHong Zhang 408b2b77a04SHong Zhang */ 409b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 410b2b77a04SHong Zhang { 411b2b77a04SHong Zhang PetscErrorCode ierr; 412b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 413b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 414b2b77a04SHong Zhang Mat_FFTW *fftw; 415b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 416b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 417b2b77a04SHong Zhang PetscBool flg; 4186882372aSHong Zhang PetscInt p_flag,partial_dim=1,ctr; 419*11902ff2SHong Zhang PetscMPIInt size,rank; 420*11902ff2SHong Zhang ptrdiff_t *pdim; 421b2b77a04SHong Zhang 422b2b77a04SHong Zhang PetscFunctionBegin; 423b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 424b2b77a04SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 425b2b77a04SHong Zhang #endif 426b2b77a04SHong Zhang 427b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 428*11902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 429*11902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 430*11902ff2SHong Zhang pdim[0] = dim[0]; 431*11902ff2SHong Zhang for(ctr=1;ctr<ndim;ctr++) 432*11902ff2SHong Zhang { 4336882372aSHong Zhang partial_dim*=dim[ctr]; 434*11902ff2SHong Zhang pdim[ctr] = dim[ctr]; 4356882372aSHong Zhang } 436*11902ff2SHong Zhang // printf("partial dimension is %d",partial_dim); 437b2b77a04SHong Zhang if (size == 1) { 438b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 439b2b77a04SHong Zhang n = N; 440b2b77a04SHong Zhang } else { 4416882372aSHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end; 442b2b77a04SHong Zhang switch (ndim){ 443b2b77a04SHong Zhang case 1: 4446882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 4456882372aSHong Zhang n = (PetscInt)local_n0; 4466882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 4476882372aSHong Zhang 448b2b77a04SHong Zhang break; 449b2b77a04SHong Zhang case 2: 450b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 451b2b77a04SHong Zhang /* 452b2b77a04SHong Zhang PetscMPIInt rank; 453b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 454b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 455b2b77a04SHong Zhang */ 456b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 457b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 458b2b77a04SHong Zhang break; 459b2b77a04SHong Zhang case 3: 4606882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 461*11902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 4626882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 4636882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 464b2b77a04SHong Zhang break; 465b2b77a04SHong Zhang default: 466*11902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 467*11902ff2SHong Zhang // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 468*11902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 4696882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 470*11902ff2SHong Zhang // printf("New partial dimension is %d %d %d",n,N,ndim); 4716882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 472b2b77a04SHong Zhang break; 473b2b77a04SHong Zhang } 474b2b77a04SHong Zhang } 475b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 476b2b77a04SHong Zhang 477b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 478b2b77a04SHong Zhang fft->data = (void*)fftw; 479b2b77a04SHong Zhang 480b2b77a04SHong Zhang fft->n = n; 481b2b77a04SHong Zhang fftw->p_forward = 0; 482b2b77a04SHong Zhang fftw->p_backward = 0; 483b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 484b2b77a04SHong Zhang 485b2b77a04SHong Zhang if (size == 1){ 486b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 487b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 488b2b77a04SHong Zhang } else { 489b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 490b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 491b2b77a04SHong Zhang } 492b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 493b2b77a04SHong Zhang A->ops->getvecs = MatGetVecs_FFTW; 494b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 495b2b77a04SHong Zhang 496b2b77a04SHong Zhang /* get runtime options */ 497b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 498b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 499b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 500b2b77a04SHong Zhang PetscOptionsEnd(); 501b2b77a04SHong Zhang PetscFunctionReturn(0); 502b2b77a04SHong Zhang } 503b2b77a04SHong Zhang EXTERN_C_END 504