1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 15b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 16b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 17b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 18b2b77a04SHong Zhang } Mat_FFTW; 19b2b77a04SHong Zhang 20b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 21b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 25b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 26b2b77a04SHong Zhang extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 27*5b3e41ffSAmlan Barua extern PetscErrorCode InputTransformFFT_FFTW(Mat,Vec,Vec); 28*5b3e41ffSAmlan Barua extern PetscErrorCode InputTransformFFT(Mat,Vec,Vec); 29b2b77a04SHong Zhang 30b2b77a04SHong Zhang #undef __FUNCT__ 31b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 32b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 33b2b77a04SHong Zhang { 34b2b77a04SHong Zhang PetscErrorCode ierr; 35b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 36b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 37b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 38b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 39b2b77a04SHong Zhang 40b2b77a04SHong Zhang PetscFunctionBegin; 41b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 42b9ae062cSHong Zhang 43b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 44b2b77a04SHong Zhang #endif 45b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 46b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 47b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 48b2b77a04SHong Zhang switch (ndim){ 49b2b77a04SHong Zhang case 1: 50b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 51b2b77a04SHong Zhang break; 52b2b77a04SHong Zhang case 2: 53b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 54b2b77a04SHong Zhang break; 55b2b77a04SHong Zhang case 3: 56b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 57b2b77a04SHong Zhang break; 58b2b77a04SHong Zhang default: 59b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 60b2b77a04SHong Zhang break; 61b2b77a04SHong Zhang } 62b2b77a04SHong Zhang fftw->finarray = x_array; 63b2b77a04SHong Zhang fftw->foutarray = y_array; 64b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 65b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 66b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 67b2b77a04SHong Zhang } else { /* use existing plan */ 68b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 69b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 70b2b77a04SHong Zhang } else { 71b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 72b2b77a04SHong Zhang } 73b2b77a04SHong Zhang } 74b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 75b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 76b2b77a04SHong Zhang PetscFunctionReturn(0); 77b2b77a04SHong Zhang } 78b2b77a04SHong Zhang 79b2b77a04SHong Zhang #undef __FUNCT__ 80b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 81b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 82b2b77a04SHong Zhang { 83b2b77a04SHong Zhang PetscErrorCode ierr; 84b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 85b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 86b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 87b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 88b2b77a04SHong Zhang 89b2b77a04SHong Zhang PetscFunctionBegin; 90b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 91b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 92b2b77a04SHong Zhang #endif 93b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 94b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 95b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 96b2b77a04SHong Zhang switch (ndim){ 97b2b77a04SHong Zhang case 1: 98b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 99b2b77a04SHong Zhang break; 100b2b77a04SHong Zhang case 2: 101b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 102b2b77a04SHong Zhang break; 103b2b77a04SHong Zhang case 3: 104b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 105b2b77a04SHong Zhang break; 106b2b77a04SHong Zhang default: 107b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 108b2b77a04SHong Zhang break; 109b2b77a04SHong Zhang } 110b2b77a04SHong Zhang fftw->binarray = x_array; 111b2b77a04SHong Zhang fftw->boutarray = y_array; 112b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 113b2b77a04SHong Zhang } else { /* use existing plan */ 114b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 115b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 116b2b77a04SHong Zhang } else { 117b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 118b2b77a04SHong Zhang } 119b2b77a04SHong Zhang } 120b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 121b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 122b2b77a04SHong Zhang PetscFunctionReturn(0); 123b2b77a04SHong Zhang } 124b2b77a04SHong Zhang 125b2b77a04SHong Zhang #undef __FUNCT__ 126b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 127b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 128b2b77a04SHong Zhang { 129b2b77a04SHong Zhang PetscErrorCode ierr; 130b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 131b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 132b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 133c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 134b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 135c92418ccSAmlan Barua // PetscInt ctr; 136c92418ccSAmlan Barua // ptrdiff_t ndim1=(ptrdiff_t) ndim,*pdim; 137c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 138c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 139c92418ccSAmlan Barua 140c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 141c92418ccSAmlan Barua // { 142c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 143c92418ccSAmlan Barua // } 144b2b77a04SHong Zhang 145b2b77a04SHong Zhang PetscFunctionBegin; 146*5b3e41ffSAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 147*5b3e41ffSAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 148*5b3e41ffSAmlan Barua //#endif 149c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 150c92418ccSAmlan Barua // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 15111902ff2SHong Zhang 152b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 153b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 154b2b77a04SHong Zhang if (!fftw->p_forward){ /* create a plan, then excute it */ 155b2b77a04SHong Zhang switch (ndim){ 156b2b77a04SHong Zhang case 1: 1573c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 158b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1593c3a9c75SAmlan Barua #endif 160b2b77a04SHong Zhang break; 161b2b77a04SHong Zhang case 2: 1623c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 163b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1643c3a9c75SAmlan Barua #else 165*5b3e41ffSAmlan Barua printf("The code comes here \n"); 1663c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1673c3a9c75SAmlan Barua #endif 168b2b77a04SHong Zhang break; 169b2b77a04SHong Zhang case 3: 1703c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 171b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1723c3a9c75SAmlan Barua #else 1733c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[3],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1743c3a9c75SAmlan Barua #endif 175b2b77a04SHong Zhang break; 176b2b77a04SHong Zhang default: 1773c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 178c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 1793c3a9c75SAmlan Barua #else 1803c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 1813c3a9c75SAmlan Barua #endif 18211902ff2SHong Zhang // fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 183b2b77a04SHong Zhang break; 184b2b77a04SHong Zhang } 185b2b77a04SHong Zhang fftw->finarray = x_array; 186b2b77a04SHong Zhang fftw->foutarray = y_array; 187b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 188b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 189b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 190b2b77a04SHong Zhang } else { /* use existing plan */ 191b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 192b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 193b2b77a04SHong Zhang } else { 194b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 195b2b77a04SHong Zhang } 196b2b77a04SHong Zhang } 197b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 198b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 199b2b77a04SHong Zhang PetscFunctionReturn(0); 200b2b77a04SHong Zhang } 201b2b77a04SHong Zhang 202b2b77a04SHong Zhang #undef __FUNCT__ 203b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 204b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 205b2b77a04SHong Zhang { 206b2b77a04SHong Zhang PetscErrorCode ierr; 207b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 208b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 209b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 210c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 211b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 212c92418ccSAmlan Barua // PetscInt ctr; 213c92418ccSAmlan Barua // ptrdiff_t ndim1=(ptrdiff_t)ndim,*pdim; 214c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 215c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 216c92418ccSAmlan Barua 217c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 218c92418ccSAmlan Barua // { 219c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 220c92418ccSAmlan Barua // } 221b2b77a04SHong Zhang 222b2b77a04SHong Zhang PetscFunctionBegin; 2233c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 2243c3a9c75SAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 2253c3a9c75SAmlan Barua //#endif 226c92418ccSAmlan Barua // ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr); 227c92418ccSAmlan Barua // should pdim be a member of Mat_FFTW? 228c92418ccSAmlan Barua // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 22911902ff2SHong Zhang 230b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 231b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 232b2b77a04SHong Zhang if (!fftw->p_backward){ /* create a plan, then excute it */ 233b2b77a04SHong Zhang switch (ndim){ 234b2b77a04SHong Zhang case 1: 2353c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 236b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2373c3a9c75SAmlan Barua #endif 238b2b77a04SHong Zhang break; 239b2b77a04SHong Zhang case 2: 2403c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 241b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2423c3a9c75SAmlan Barua #else 2433c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2443c3a9c75SAmlan Barua #endif 245b2b77a04SHong Zhang break; 246b2b77a04SHong Zhang case 3: 2473c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 248b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2493c3a9c75SAmlan Barua #else 2503c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2513c3a9c75SAmlan Barua #endif 252b2b77a04SHong Zhang break; 253b2b77a04SHong Zhang default: 2543c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 255c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2563c3a9c75SAmlan Barua #else 2573c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 2583c3a9c75SAmlan Barua #endif 259c92418ccSAmlan Barua // fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 260b2b77a04SHong Zhang break; 261b2b77a04SHong Zhang } 262b2b77a04SHong Zhang fftw->binarray = x_array; 263b2b77a04SHong Zhang fftw->boutarray = y_array; 264b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 265b2b77a04SHong Zhang } else { /* use existing plan */ 266b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 267b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 268b2b77a04SHong Zhang } else { 269b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 270b2b77a04SHong Zhang } 271b2b77a04SHong Zhang } 272b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 273b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 274b2b77a04SHong Zhang PetscFunctionReturn(0); 275b2b77a04SHong Zhang } 276b2b77a04SHong Zhang 277b2b77a04SHong Zhang #undef __FUNCT__ 278b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 279b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 280b2b77a04SHong Zhang { 281b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 282b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 283b2b77a04SHong Zhang PetscErrorCode ierr; 284b2b77a04SHong Zhang 285b2b77a04SHong Zhang PetscFunctionBegin; 286b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 287b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 288b2b77a04SHong Zhang #endif 289b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 290b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 291b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 292bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 293b2b77a04SHong Zhang PetscFunctionReturn(0); 294b2b77a04SHong Zhang } 295b2b77a04SHong Zhang 296c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 297b2b77a04SHong Zhang #undef __FUNCT__ 298b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 299b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 300b2b77a04SHong Zhang { 301b2b77a04SHong Zhang PetscErrorCode ierr; 302b2b77a04SHong Zhang PetscScalar *array; 303b2b77a04SHong Zhang 304b2b77a04SHong Zhang PetscFunctionBegin; 305b2b77a04SHong Zhang #if !defined(PETSC_USE_COMPLEX) 306b2b77a04SHong Zhang SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 307b2b77a04SHong Zhang #endif 308b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 309b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 310b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 311b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 312b2b77a04SHong Zhang PetscFunctionReturn(0); 313b2b77a04SHong Zhang } 314b2b77a04SHong Zhang 315b2b77a04SHong Zhang #undef __FUNCT__ 3163c3a9c75SAmlan Barua #define __FUNCT__ "MatGetVecs1DC_FFTW" 317c92418ccSAmlan Barua /* 318c92418ccSAmlan Barua MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the 319c92418ccSAmlan Barua parallel layout determined by FFTW-1D 320c92418ccSAmlan Barua 321c92418ccSAmlan Barua */ 322c92418ccSAmlan Barua PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout) 323c92418ccSAmlan Barua { 324c92418ccSAmlan Barua PetscErrorCode ierr; 325c92418ccSAmlan Barua PetscMPIInt size,rank; 326c92418ccSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 327c92418ccSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 328c92418ccSAmlan Barua // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 329c92418ccSAmlan Barua PetscInt N=fft->N; 330c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 331c92418ccSAmlan Barua ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 332c92418ccSAmlan Barua ptrdiff_t f_local_n1,f_local_1_end; 333c92418ccSAmlan Barua ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 334c92418ccSAmlan Barua ptrdiff_t b_local_n1,b_local_1_end; 335c92418ccSAmlan Barua fftw_complex *data_fin,*data_fout,*data_bin,*data_bout; 336c92418ccSAmlan Barua 337c92418ccSAmlan Barua PetscFunctionBegin; 338c92418ccSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 339c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 340c92418ccSAmlan Barua #endif 341c92418ccSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 342c92418ccSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 343c92418ccSAmlan Barua if (size == 1){ 344c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 345c92418ccSAmlan Barua } 346c92418ccSAmlan Barua else { 347c92418ccSAmlan Barua if (ndim>1){ 348c92418ccSAmlan Barua SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 349c92418ccSAmlan Barua else { 350c92418ccSAmlan Barua f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 351c92418ccSAmlan Barua b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 352c92418ccSAmlan Barua if (fin) { 353c92418ccSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 354c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 355c92418ccSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 356c92418ccSAmlan Barua } 357c92418ccSAmlan Barua if (fout) { 358c92418ccSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 359c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 360c92418ccSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 361c92418ccSAmlan Barua } 362c92418ccSAmlan Barua if (bin) { 363c92418ccSAmlan Barua data_bin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 364c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr); 365c92418ccSAmlan Barua (*bin)->ops->destroy = VecDestroy_MPIFFTW; 366c92418ccSAmlan Barua } 367c92418ccSAmlan Barua if (bout) { 368c92418ccSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 369c92418ccSAmlan Barua ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 370c92418ccSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 371c92418ccSAmlan Barua } 372c92418ccSAmlan Barua } 373c92418ccSAmlan Barua if (fin){ 374c92418ccSAmlan Barua ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 375c92418ccSAmlan Barua } 376c92418ccSAmlan Barua if (fout){ 377c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 378c92418ccSAmlan Barua } 379c92418ccSAmlan Barua if (bin){ 380c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr); 381c92418ccSAmlan Barua } 382c92418ccSAmlan Barua if (bout){ 383c92418ccSAmlan Barua ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 384c92418ccSAmlan Barua } 385c92418ccSAmlan Barua PetscFunctionReturn(0); 386c92418ccSAmlan Barua } 387c92418ccSAmlan Barua 388c92418ccSAmlan Barua 389c92418ccSAmlan Barua } 3903c3a9c75SAmlan Barua 391c92418ccSAmlan Barua #undef __FUNCT__ 392b2b77a04SHong Zhang #define __FUNCT__ "MatGetVecs_FFTW" 393b2b77a04SHong Zhang /* 394b2b77a04SHong Zhang MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 395b2b77a04SHong Zhang parallel layout determined by FFTW 396b2b77a04SHong Zhang 397b2b77a04SHong Zhang Collective on Mat 398b2b77a04SHong Zhang 399b2b77a04SHong Zhang Input Parameter: 400b2b77a04SHong Zhang . mat - the matrix 401b2b77a04SHong Zhang 402b2b77a04SHong Zhang Output Parameter: 403b2b77a04SHong Zhang + fin - (optional) input vector of forward FFTW 404b2b77a04SHong Zhang - fout - (optional) output vector of forward FFTW 405b2b77a04SHong Zhang 406b2b77a04SHong Zhang Level: advanced 407b2b77a04SHong Zhang 408b2b77a04SHong Zhang .seealso: MatCreateFFTW() 409b2b77a04SHong Zhang */ 410b2b77a04SHong Zhang PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 411b2b77a04SHong Zhang { 412b2b77a04SHong Zhang PetscErrorCode ierr; 413b2b77a04SHong Zhang PetscMPIInt size,rank; 414b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 415b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 416c92418ccSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 4173c3a9c75SAmlan Barua PetscInt N=fft->N, N1, n1,vsize; 418b2b77a04SHong Zhang 419b2b77a04SHong Zhang PetscFunctionBegin; 4203c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 4213c3a9c75SAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 4223c3a9c75SAmlan Barua //#endif 423b2b77a04SHong Zhang PetscValidHeaderSpecific(A,MAT_CLASSID,1); 424b2b77a04SHong Zhang PetscValidType(A,1); 425b2b77a04SHong Zhang 426b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 427b2b77a04SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 428b2b77a04SHong Zhang if (size == 1){ /* sequential case */ 429b2b77a04SHong Zhang if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 430b2b77a04SHong Zhang if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 4313c3a9c75SAmlan Barua printf("The code successfully comes at the end of the routine with one processor\n"); 432b2b77a04SHong Zhang } else { /* mpi case */ 433b2b77a04SHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start; 4346882372aSHong Zhang ptrdiff_t local_n1,local_1_end; 435c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 436b2b77a04SHong Zhang fftw_complex *data_fin,*data_fout; 4373c3a9c75SAmlan Barua double *data_finr, *data_foutr; 4383c3a9c75SAmlan Barua ptrdiff_t local_1_start; 439c92418ccSAmlan Barua // PetscInt ctr; 440c92418ccSAmlan Barua // ptrdiff_t ndim1,*pdim; 441c92418ccSAmlan Barua // ndim1=(ptrdiff_t) ndim; 442c92418ccSAmlan Barua // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 44311902ff2SHong Zhang 444c92418ccSAmlan Barua // for(ctr=0;ctr<ndim;ctr++) 445f76f76beSAmlan Barua // {k 446c92418ccSAmlan Barua // pdim[ctr] = dim[ctr]; 447c92418ccSAmlan Barua // } 448b2b77a04SHong Zhang 449f76f76beSAmlan Barua 450f76f76beSAmlan Barua 451b2b77a04SHong Zhang switch (ndim){ 452b2b77a04SHong Zhang case 1: 4536882372aSHong Zhang /* Get local size */ 454c92418ccSAmlan Barua /* We need to write an error message here saying that one cannot call this routine when doing paralllel 1D complex FFTW */ 455c92418ccSAmlan Barua // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 4566882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 4576882372aSHong Zhang if (fin) { 4586882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4596882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 4606882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4616882372aSHong Zhang } 4626882372aSHong Zhang if (fout) { 4636882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 4646882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4656882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4666882372aSHong Zhang } 467b2b77a04SHong Zhang break; 468b2b77a04SHong Zhang case 2: 4693c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4703c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4713c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 4723c3a9c75SAmlan Barua if (fin) { 4733c3a9c75SAmlan Barua data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 47454dd5118SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 4753c3a9c75SAmlan Barua ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 47609dd8a53SAmlan Barua //printf("The code comes here with vector size %d\n",vsize); 4773c3a9c75SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4783c3a9c75SAmlan Barua } 4793c3a9c75SAmlan Barua if (fout) { 48009dd8a53SAmlan Barua data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 48109dd8a53SAmlan Barua ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 4823c3a9c75SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4833c3a9c75SAmlan Barua } 484f76f76beSAmlan Barua printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 4853c3a9c75SAmlan Barua 4863c3a9c75SAmlan Barua #else 487b2b77a04SHong Zhang /* Get local size */ 48809dd8a53SAmlan Barua printf("Hope this does not come here"); 489b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 490b2b77a04SHong Zhang if (fin) { 491b2b77a04SHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 492b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 493b2b77a04SHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 494b2b77a04SHong Zhang } 495b2b77a04SHong Zhang if (fout) { 496b2b77a04SHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 497b2b77a04SHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 498b2b77a04SHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 499b2b77a04SHong Zhang } 5003c3a9c75SAmlan Barua printf("Hope this does not come here"); 5013c3a9c75SAmlan Barua #endif 502b2b77a04SHong Zhang break; 503b2b77a04SHong Zhang case 3: 5046882372aSHong Zhang /* Get local size */ 5053c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5063c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"Not done yet"); 5073c3a9c75SAmlan Barua #else 5086882372aSHong Zhang alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 50911902ff2SHong Zhang // printf("The quantity n is %d",n); 5106882372aSHong Zhang if (fin) { 5116882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5126882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5136882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5146882372aSHong Zhang } 5156882372aSHong Zhang if (fout) { 5166882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5176882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5186882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5196882372aSHong Zhang } 5203c3a9c75SAmlan Barua #endif 521b2b77a04SHong Zhang break; 522b2b77a04SHong Zhang default: 5236882372aSHong Zhang /* Get local size */ 5243c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5253c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"Not done yet"); 5263c3a9c75SAmlan Barua #else 527c92418ccSAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 52811902ff2SHong Zhang // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 52911902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 53011902ff2SHong Zhang // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 53111902ff2SHong Zhang // for(i=0;i<ndim;i++) 53211902ff2SHong Zhang // { 53311902ff2SHong Zhang // pdim[i]=dim[i];printf("%d",pdim[i]); 53411902ff2SHong Zhang // } 53511902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 53611902ff2SHong Zhang // printf("The quantity n is %d",n); 5376882372aSHong Zhang if (fin) { 5386882372aSHong Zhang data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5396882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5406882372aSHong Zhang (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5416882372aSHong Zhang } 5426882372aSHong Zhang if (fout) { 5436882372aSHong Zhang data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5446882372aSHong Zhang ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5456882372aSHong Zhang (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5466882372aSHong Zhang } 5473c3a9c75SAmlan Barua #endif 548b2b77a04SHong Zhang break; 549b2b77a04SHong Zhang } 550b2b77a04SHong Zhang } 55154dd5118SAmlan Barua // if (fin){ 55254dd5118SAmlan Barua // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 55354dd5118SAmlan Barua // } 55454dd5118SAmlan Barua // if (fout){ 55554dd5118SAmlan Barua // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 55654dd5118SAmlan Barua // } 557b2b77a04SHong Zhang PetscFunctionReturn(0); 558b2b77a04SHong Zhang } 559b2b77a04SHong Zhang 5603c3a9c75SAmlan Barua //EXTERN_C_BEGIN - Do we need this? 561b2b77a04SHong Zhang #undef __FUNCT__ 5623c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT" 5633c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 5643c3a9c75SAmlan Barua { 5653c3a9c75SAmlan Barua PetscErrorCode ierr; 5663c3a9c75SAmlan Barua PetscFunctionBegin; 5673c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 5683c3a9c75SAmlan Barua PetscFunctionReturn(0); 5693c3a9c75SAmlan Barua } 5703c3a9c75SAmlan Barua //EXTERN_C_END - Do we need this? 571b2b77a04SHong Zhang /* 5723c3a9c75SAmlan Barua InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block 5733c3a9c75SAmlan Barua Input A, x, y 5743c3a9c75SAmlan Barua A - FFTW matrix 57554dd5118SAmlan Barua x - user data 576b2b77a04SHong Zhang Options Database Keys: 577b2b77a04SHong Zhang + -mat_fftw_plannerflags - set FFTW planner flags 578b2b77a04SHong Zhang 579b2b77a04SHong Zhang Level: intermediate 580b2b77a04SHong Zhang 581b2b77a04SHong Zhang */ 5823c3a9c75SAmlan Barua 5833c3a9c75SAmlan Barua EXTERN_C_BEGIN 5843c3a9c75SAmlan Barua #undef __FUNCT__ 5853c3a9c75SAmlan Barua #define __FUNCT__ "InputTransformFFT_FTTW" 5863c3a9c75SAmlan Barua PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 5873c3a9c75SAmlan Barua { 5883c3a9c75SAmlan Barua PetscErrorCode ierr; 5893c3a9c75SAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 5903c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 5913c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 5923c3a9c75SAmlan Barua PetscInt N=fft->N, N1, n1 ,NM; 5933c3a9c75SAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 594f76f76beSAmlan Barua PetscInt low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4; 595f76f76beSAmlan Barua PetscInt i,j,rank,size; 5963c3a9c75SAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 5973c3a9c75SAmlan Barua ptrdiff_t local_n1,local_1_start; 5983c3a9c75SAmlan Barua VecScatter vecscat; 5993c3a9c75SAmlan Barua IS list1,list2; 6003c3a9c75SAmlan Barua 601f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 602f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 6033c3a9c75SAmlan Barua ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 604f76f76beSAmlan Barua printf("Local ownership starts at %d\n",low); 6053c3a9c75SAmlan Barua 6063c3a9c75SAmlan Barua switch (ndim){ 6073c3a9c75SAmlan Barua case 1: 6083c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"Not Supported by FFTW"); 6093c3a9c75SAmlan Barua break; 6103c3a9c75SAmlan Barua case 2: 6113c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 6123c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 6133c3a9c75SAmlan Barua 614*5b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 615*5b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 616*5b3e41ffSAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 6173c3a9c75SAmlan Barua 6183c3a9c75SAmlan Barua if (dim[1]%2==0) 6193c3a9c75SAmlan Barua NM = dim[1]+2; 6203c3a9c75SAmlan Barua else 6213c3a9c75SAmlan Barua NM = dim[1]+1; 6223c3a9c75SAmlan Barua 6233c3a9c75SAmlan Barua for (i=0;i<local_n0;i++){ 6243c3a9c75SAmlan Barua for (j=0;j<dim[1];j++){ 6253c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 6263c3a9c75SAmlan Barua tempindx1 = i*NM + j; 627*5b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 6283c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 629*5b3e41ffSAmlan Barua // printf("Val tempindx1 = %d\n",tempindx1); 630*5b3e41ffSAmlan Barua // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 631*5b3e41ffSAmlan Barua // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 632*5b3e41ffSAmlan Barua // printf("-------------------------\n",indx2[tempindx],rank); 6333c3a9c75SAmlan Barua } 6343c3a9c75SAmlan Barua } 6353c3a9c75SAmlan Barua 6363c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 6373c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 6383c3a9c75SAmlan Barua 639f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 640f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 641f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 642f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 6433c3a9c75SAmlan Barua break; 6443c3a9c75SAmlan Barua 6453c3a9c75SAmlan Barua case 3: 6463c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet"); 6473c3a9c75SAmlan Barua break; 6483c3a9c75SAmlan Barua 6493c3a9c75SAmlan Barua default: 6503c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet"); 6513c3a9c75SAmlan Barua break; 6523c3a9c75SAmlan Barua } 6533c3a9c75SAmlan Barua 6543c3a9c75SAmlan Barua return 0; 6553c3a9c75SAmlan Barua } 6563c3a9c75SAmlan Barua EXTERN_C_END 6573c3a9c75SAmlan Barua 658*5b3e41ffSAmlan Barua 6593c3a9c75SAmlan Barua //EXTERN_C_BEGIN - Do we need this? 6603c3a9c75SAmlan Barua #undef __FUNCT__ 6613c3a9c75SAmlan Barua #define __FUNCT__ "OutputTransformFFT" 6623c3a9c75SAmlan Barua PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 6633c3a9c75SAmlan Barua { 6643c3a9c75SAmlan Barua PetscErrorCode ierr; 6653c3a9c75SAmlan Barua PetscFunctionBegin; 6663c3a9c75SAmlan Barua ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 6673c3a9c75SAmlan Barua PetscFunctionReturn(0); 6683c3a9c75SAmlan Barua } 6693c3a9c75SAmlan Barua //EXTERN_C_END - Do we need this? 670*5b3e41ffSAmlan Barua /* 671*5b3e41ffSAmlan Barua OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 672*5b3e41ffSAmlan Barua Input A, x, y 673*5b3e41ffSAmlan Barua A - FFTW matrix 674*5b3e41ffSAmlan Barua x - FFTW vector 675*5b3e41ffSAmlan Barua y - PETSc vector that user can read 676*5b3e41ffSAmlan Barua Options Database Keys: 677*5b3e41ffSAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 678*5b3e41ffSAmlan Barua 679*5b3e41ffSAmlan Barua Level: intermediate 680*5b3e41ffSAmlan Barua 6813c3a9c75SAmlan Barua */ 6823c3a9c75SAmlan Barua 6833c3a9c75SAmlan Barua EXTERN_C_BEGIN 6843c3a9c75SAmlan Barua #undef __FUNCT__ 685*5b3e41ffSAmlan Barua #define __FUNCT__ "OutputTransformFFT_FTTW" 686*5b3e41ffSAmlan Barua PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 687*5b3e41ffSAmlan Barua { 688*5b3e41ffSAmlan Barua PetscErrorCode ierr; 689*5b3e41ffSAmlan Barua MPI_Comm comm=((PetscObject)A)->comm; 690*5b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 691*5b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 692*5b3e41ffSAmlan Barua PetscInt N=fft->N, N1, n1 ,NM; 693*5b3e41ffSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 694*5b3e41ffSAmlan Barua PetscInt low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4; 695*5b3e41ffSAmlan Barua PetscInt i,j,rank,size; 696*5b3e41ffSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 697*5b3e41ffSAmlan Barua ptrdiff_t local_n1,local_1_start; 698*5b3e41ffSAmlan Barua VecScatter vecscat; 699*5b3e41ffSAmlan Barua IS list1,list2; 700*5b3e41ffSAmlan Barua 701*5b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 702*5b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 703*5b3e41ffSAmlan Barua ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 704*5b3e41ffSAmlan Barua printf("Local ownership starts at %d\n",low); 705*5b3e41ffSAmlan Barua 706*5b3e41ffSAmlan Barua switch (ndim){ 707*5b3e41ffSAmlan Barua case 1: 708*5b3e41ffSAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"Not Supported by FFTW"); 709*5b3e41ffSAmlan Barua break; 710*5b3e41ffSAmlan Barua case 2: 711*5b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 712*5b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 713*5b3e41ffSAmlan Barua 714*5b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 715*5b3e41ffSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 716*5b3e41ffSAmlan Barua printf("Val local_0_start = %ld\n",local_0_start); 717*5b3e41ffSAmlan Barua 718*5b3e41ffSAmlan Barua if (dim[1]%2==0) 719*5b3e41ffSAmlan Barua NM = dim[1]+2; 720*5b3e41ffSAmlan Barua else 721*5b3e41ffSAmlan Barua NM = dim[1]+1; 722*5b3e41ffSAmlan Barua 723*5b3e41ffSAmlan Barua 724*5b3e41ffSAmlan Barua 725*5b3e41ffSAmlan Barua for (i=0;i<local_n0;i++){ 726*5b3e41ffSAmlan Barua for (j=0;j<dim[1];j++){ 727*5b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 728*5b3e41ffSAmlan Barua tempindx1 = i*NM + j; 729*5b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 730*5b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 731*5b3e41ffSAmlan Barua printf("Val tempindx1 = %d\n",tempindx1); 732*5b3e41ffSAmlan Barua printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 733*5b3e41ffSAmlan Barua printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 734*5b3e41ffSAmlan Barua printf("-------------------------\n",indx2[tempindx],rank); 735*5b3e41ffSAmlan Barua } 736*5b3e41ffSAmlan Barua } 737*5b3e41ffSAmlan Barua 738*5b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 739*5b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 740*5b3e41ffSAmlan Barua 741*5b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 742*5b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 743*5b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 744*5b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 745*5b3e41ffSAmlan Barua break; 746*5b3e41ffSAmlan Barua 747*5b3e41ffSAmlan Barua case 3: 748*5b3e41ffSAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet"); 749*5b3e41ffSAmlan Barua break; 750*5b3e41ffSAmlan Barua 751*5b3e41ffSAmlan Barua default: 752*5b3e41ffSAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet"); 753*5b3e41ffSAmlan Barua break; 754*5b3e41ffSAmlan Barua } 755*5b3e41ffSAmlan Barua 756*5b3e41ffSAmlan Barua return 0; 757*5b3e41ffSAmlan Barua } 758*5b3e41ffSAmlan Barua EXTERN_C_END 759*5b3e41ffSAmlan Barua 760*5b3e41ffSAmlan Barua 761*5b3e41ffSAmlan Barua 762*5b3e41ffSAmlan Barua 763*5b3e41ffSAmlan Barua EXTERN_C_BEGIN 764*5b3e41ffSAmlan Barua #undef __FUNCT__ 7653c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 7663c3a9c75SAmlan Barua /* 7673c3a9c75SAmlan Barua MatCreate_FFTW - Creates a matrix object that provides FFT 7683c3a9c75SAmlan Barua via the external package FFTW 7693c3a9c75SAmlan Barua Options Database Keys: 7703c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 7713c3a9c75SAmlan Barua 7723c3a9c75SAmlan Barua Level: intermediate 7733c3a9c75SAmlan Barua 7743c3a9c75SAmlan Barua */ 7753c3a9c75SAmlan Barua 776b2b77a04SHong Zhang PetscErrorCode MatCreate_FFTW(Mat A) 777b2b77a04SHong Zhang { 778b2b77a04SHong Zhang PetscErrorCode ierr; 779b2b77a04SHong Zhang MPI_Comm comm=((PetscObject)A)->comm; 780b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 781b2b77a04SHong Zhang Mat_FFTW *fftw; 782b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 783b2b77a04SHong Zhang const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 784b2b77a04SHong Zhang PetscBool flg; 7856882372aSHong Zhang PetscInt p_flag,partial_dim=1,ctr; 78611902ff2SHong Zhang PetscMPIInt size,rank; 78711902ff2SHong Zhang ptrdiff_t *pdim; 788*5b3e41ffSAmlan Barua ptrdiff_t local_n1,local_1_start; 789b2b77a04SHong Zhang 790b2b77a04SHong Zhang PetscFunctionBegin; 7913c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 7923c3a9c75SAmlan Barua // SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 7933c3a9c75SAmlan Barua //#endif 794b2b77a04SHong Zhang 795b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 79611902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 79754dd5118SAmlan Barua ierr = MPI_Barrier(PETSC_COMM_WORLD); 798c92418ccSAmlan Barua 79911902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 80011902ff2SHong Zhang pdim[0] = dim[0]; 80111902ff2SHong Zhang for(ctr=1;ctr<ndim;ctr++) 80211902ff2SHong Zhang { 8036882372aSHong Zhang partial_dim *= dim[ctr]; 80411902ff2SHong Zhang pdim[ctr] = dim[ctr]; 8056882372aSHong Zhang } 8063c3a9c75SAmlan Barua //#if !defined(PETSC_USE_COMPLEX) 8073c3a9c75SAmlan Barua // SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 8083c3a9c75SAmlan Barua //#endif 8093c3a9c75SAmlan Barua 81011902ff2SHong Zhang // printf("partial dimension is %d",partial_dim); 811b2b77a04SHong Zhang if (size == 1) { 812b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 813b2b77a04SHong Zhang n = N; 814b2b77a04SHong Zhang } else { 8156882372aSHong Zhang ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end; 816b2b77a04SHong Zhang switch (ndim){ 817b2b77a04SHong Zhang case 1: 8183c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 8193c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 8203c3a9c75SAmlan Barua #endif 8216882372aSHong Zhang alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 8226882372aSHong Zhang n = (PetscInt)local_n0; 8236882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 8243c3a9c75SAmlan Barua // PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW); 825b2b77a04SHong Zhang break; 826b2b77a04SHong Zhang case 2: 827*5b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 828b2b77a04SHong Zhang alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 829b2b77a04SHong Zhang /* 830b2b77a04SHong Zhang PetscMPIInt rank; 831b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 832b2b77a04SHong Zhang PetscSynchronizedFlush(comm); 833b2b77a04SHong Zhang */ 834b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 835b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 836*5b3e41ffSAmlan Barua #else 837*5b3e41ffSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 838*5b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 839*5b3e41ffSAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 840*5b3e41ffSAmlan Barua #endif 841b2b77a04SHong Zhang break; 842b2b77a04SHong Zhang case 3: 84311902ff2SHong Zhang // printf("The value of alloc local is %d",alloc_local); 8446882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 8456882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 846b2b77a04SHong Zhang break; 847b2b77a04SHong Zhang default: 84811902ff2SHong Zhang alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 849c92418ccSAmlan Barua // printf("The value of alloc local is %ld from process %d\n",alloc_local,rank); 85011902ff2SHong Zhang // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 8516882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 85211902ff2SHong Zhang // printf("New partial dimension is %d %d %d",n,N,ndim); 8536882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 854b2b77a04SHong Zhang break; 855b2b77a04SHong Zhang } 856b2b77a04SHong Zhang } 857b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 858b2b77a04SHong Zhang 859b2b77a04SHong Zhang ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 860b2b77a04SHong Zhang fft->data = (void*)fftw; 861b2b77a04SHong Zhang 862b2b77a04SHong Zhang fft->n = n; 863c92418ccSAmlan Barua fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 864c92418ccSAmlan Barua ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 865b1301b2eSHong Zhang for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 866c92418ccSAmlan Barua 867b2b77a04SHong Zhang fftw->p_forward = 0; 868b2b77a04SHong Zhang fftw->p_backward = 0; 869b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 870b2b77a04SHong Zhang 871b2b77a04SHong Zhang if (size == 1){ 872b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 873b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 874b2b77a04SHong Zhang } else { 875b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 876b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 877b2b77a04SHong Zhang } 878b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 879b2b77a04SHong Zhang A->ops->getvecs = MatGetVecs_FFTW; 880b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 8813c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 8823c3a9c75SAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 883*5b3e41ffSAmlan Barua PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 8843c3a9c75SAmlan Barua #endif 885b2b77a04SHong Zhang 886b2b77a04SHong Zhang /* get runtime options */ 887b2b77a04SHong Zhang ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 888b2b77a04SHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 889b2b77a04SHong Zhang if (flg) {fftw->p_flag = (unsigned)p_flag;} 890b2b77a04SHong Zhang PetscOptionsEnd(); 891b2b77a04SHong Zhang PetscFunctionReturn(0); 892b2b77a04SHong Zhang } 893b2b77a04SHong Zhang EXTERN_C_END 8943c3a9c75SAmlan Barua 8953c3a9c75SAmlan Barua 8963c3a9c75SAmlan Barua 8973c3a9c75SAmlan Barua 898