1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 15a1b6d50cSHong Zhang fftw_iodim64 *iodims; 16a1b6d50cSHong Zhang #else 178c1d5ab3SHong Zhang fftw_iodim *iodims; 18a1b6d50cSHong Zhang #endif 19e5d7f247SAmlan Barua PetscInt partial_dim; 20b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 21b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 22b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 23b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 24b2b77a04SHong Zhang } Mat_FFTW; 25b2b77a04SHong Zhang 26b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 27b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 28b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 29b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 30b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 31b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 32b2b77a04SHong Zhang 3397504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel 3497504071SAmlan Barua Input parameter: 353564f093SHong Zhang A - the matrix 3697504071SAmlan Barua x - the vector on which FDFT will be performed 3797504071SAmlan Barua 3897504071SAmlan Barua Output parameter: 3997504071SAmlan Barua y - vector that stores result of FDFT 4097504071SAmlan Barua */ 41b2b77a04SHong Zhang #undef __FUNCT__ 42b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 43b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 44b2b77a04SHong Zhang { 45b2b77a04SHong Zhang PetscErrorCode ierr; 46b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 47b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 48b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 491acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 50a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 51a1b6d50cSHong Zhang fftw_iodim64 *iodims; 52a1b6d50cSHong Zhang #else 538c1d5ab3SHong Zhang fftw_iodim *iodims; 54a1b6d50cSHong Zhang #endif 551acd80e4SHong Zhang PetscInt i; 561acd80e4SHong Zhang #endif 571acd80e4SHong Zhang PetscInt ndim = fft->ndim,*dim = fft->dim; 58b2b77a04SHong Zhang 59b2b77a04SHong Zhang PetscFunctionBegin; 60b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 61b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 62b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 63b2b77a04SHong Zhang switch (ndim) { 64b2b77a04SHong Zhang case 1: 6558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 66b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6758a912c5SAmlan Barua #else 6858a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 6958a912c5SAmlan Barua #endif 70b2b77a04SHong Zhang break; 71b2b77a04SHong Zhang case 2: 7258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 73b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 7458a912c5SAmlan Barua #else 7558a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 7658a912c5SAmlan Barua #endif 77b2b77a04SHong Zhang break; 78b2b77a04SHong Zhang case 3: 7958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 80b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 8158a912c5SAmlan Barua #else 8258a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 8358a912c5SAmlan Barua #endif 84b2b77a04SHong Zhang break; 85b2b77a04SHong Zhang default: 8658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 87a1b6d50cSHong Zhang iodims = fftw->iodims; 88a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 898c1d5ab3SHong Zhang if (ndim) { 90a1b6d50cSHong Zhang iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1]; 918c1d5ab3SHong Zhang iodims[ndim-1].is = iodims[ndim-1].os = 1; 928c1d5ab3SHong Zhang for (i=ndim-2; i>=0; --i) { 93a1b6d50cSHong Zhang iodims[i].n = (ptrdiff_t)dim[i]; 948c1d5ab3SHong Zhang iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 958c1d5ab3SHong Zhang } 968c1d5ab3SHong Zhang } 97a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 98a1b6d50cSHong Zhang #else 99a1b6d50cSHong Zhang if (ndim) { 100a1b6d50cSHong Zhang iodims[ndim-1].n = (int)dim[ndim-1]; 101a1b6d50cSHong Zhang iodims[ndim-1].is = iodims[ndim-1].os = 1; 102a1b6d50cSHong Zhang for (i=ndim-2; i>=0; --i) { 103a1b6d50cSHong Zhang iodims[i].n = (int)dim[i]; 104a1b6d50cSHong Zhang iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 105a1b6d50cSHong Zhang } 106a1b6d50cSHong Zhang } 107a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 108a1b6d50cSHong Zhang #endif 109a1b6d50cSHong Zhang 11058a912c5SAmlan Barua #else 111a31b9140SHong Zhang fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 11258a912c5SAmlan Barua #endif 113b2b77a04SHong Zhang break; 114b2b77a04SHong Zhang } 115b2b77a04SHong Zhang fftw->finarray = x_array; 116b2b77a04SHong Zhang fftw->foutarray = y_array; 117b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 118b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 119b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 120b2b77a04SHong Zhang } else { /* use existing plan */ 121b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 122b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 123b2b77a04SHong Zhang } else { 124b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 125b2b77a04SHong Zhang } 126b2b77a04SHong Zhang } 127b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 128b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 129b2b77a04SHong Zhang PetscFunctionReturn(0); 130b2b77a04SHong Zhang } 131b2b77a04SHong Zhang 13297504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT 13397504071SAmlan Barua Input parameter: 1343564f093SHong Zhang A - the matrix 13597504071SAmlan Barua x - the vector on which BDFT will be performed 13697504071SAmlan Barua 13797504071SAmlan Barua Output parameter: 13897504071SAmlan Barua y - vector that stores result of BDFT 13997504071SAmlan Barua */ 14097504071SAmlan Barua 141b2b77a04SHong Zhang #undef __FUNCT__ 142b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 143b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 144b2b77a04SHong Zhang { 145b2b77a04SHong Zhang PetscErrorCode ierr; 146b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 147b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 148b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 149b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 1501acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 151a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 152a1b6d50cSHong Zhang fftw_iodim64 *iodims=fftw->iodims; 153a1b6d50cSHong Zhang #else 154a1b6d50cSHong Zhang fftw_iodim *iodims=fftw->iodims; 155a1b6d50cSHong Zhang #endif 1561acd80e4SHong Zhang #endif 157b2b77a04SHong Zhang 158b2b77a04SHong Zhang PetscFunctionBegin; 159b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 160b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 161b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 162b2b77a04SHong Zhang switch (ndim) { 163b2b77a04SHong Zhang case 1: 16458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 165b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 16658a912c5SAmlan Barua #else 167e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 16858a912c5SAmlan Barua #endif 169b2b77a04SHong Zhang break; 170b2b77a04SHong Zhang case 2: 17158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 172b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 17358a912c5SAmlan Barua #else 174e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 17558a912c5SAmlan Barua #endif 176b2b77a04SHong Zhang break; 177b2b77a04SHong Zhang case 3: 17858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 179b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 18058a912c5SAmlan Barua #else 181e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 18258a912c5SAmlan Barua #endif 183b2b77a04SHong Zhang break; 184b2b77a04SHong Zhang default: 18558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 186a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 187a1b6d50cSHong Zhang fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 188a1b6d50cSHong Zhang #else 1898c1d5ab3SHong Zhang fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 190a1b6d50cSHong Zhang #endif 19158a912c5SAmlan Barua #else 192a31b9140SHong Zhang fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 19358a912c5SAmlan Barua #endif 194b2b77a04SHong Zhang break; 195b2b77a04SHong Zhang } 196b2b77a04SHong Zhang fftw->binarray = x_array; 197b2b77a04SHong Zhang fftw->boutarray = y_array; 198b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 199b2b77a04SHong Zhang } else { /* use existing plan */ 200b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 201b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 202b2b77a04SHong Zhang } else { 203b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 204b2b77a04SHong Zhang } 205b2b77a04SHong Zhang } 206b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 207b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 208b2b77a04SHong Zhang PetscFunctionReturn(0); 209b2b77a04SHong Zhang } 210b2b77a04SHong Zhang 21197504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel 21297504071SAmlan Barua Input parameter: 2133564f093SHong Zhang A - the matrix 21497504071SAmlan Barua x - the vector on which FDFT will be performed 21597504071SAmlan Barua 21697504071SAmlan Barua Output parameter: 21797504071SAmlan Barua y - vector that stores result of FDFT 21897504071SAmlan Barua */ 219b2b77a04SHong Zhang #undef __FUNCT__ 220b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 221b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 222b2b77a04SHong Zhang { 223b2b77a04SHong Zhang PetscErrorCode ierr; 224b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 225b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 226b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 227c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 228ce94432eSBarry Smith MPI_Comm comm; 229b2b77a04SHong Zhang 230b2b77a04SHong Zhang PetscFunctionBegin; 231ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 232b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 233b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 234b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 235b2b77a04SHong Zhang switch (ndim) { 236b2b77a04SHong Zhang case 1: 2373c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 238b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 239ae0a50aaSHong Zhang #else 2404f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 2413c3a9c75SAmlan Barua #endif 242b2b77a04SHong Zhang break; 243b2b77a04SHong Zhang case 2: 24497504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 245b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2463c3a9c75SAmlan Barua #else 2473c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2483c3a9c75SAmlan Barua #endif 249b2b77a04SHong Zhang break; 250b2b77a04SHong Zhang case 3: 2513c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 252b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2533c3a9c75SAmlan Barua #else 25451d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2553c3a9c75SAmlan Barua #endif 256b2b77a04SHong Zhang break; 257b2b77a04SHong Zhang default: 2583c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 259c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2603c3a9c75SAmlan Barua #else 2613c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2623c3a9c75SAmlan Barua #endif 263b2b77a04SHong Zhang break; 264b2b77a04SHong Zhang } 265b2b77a04SHong Zhang fftw->finarray = x_array; 266b2b77a04SHong Zhang fftw->foutarray = y_array; 267b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 268b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 269b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 270b2b77a04SHong Zhang } else { /* use existing plan */ 271b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 272b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 273b2b77a04SHong Zhang } else { 274b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 275b2b77a04SHong Zhang } 276b2b77a04SHong Zhang } 277b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 278b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 279b2b77a04SHong Zhang PetscFunctionReturn(0); 280b2b77a04SHong Zhang } 281b2b77a04SHong Zhang 28297504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT 28397504071SAmlan Barua Input parameter: 2843564f093SHong Zhang A - the matrix 28597504071SAmlan Barua x - the vector on which BDFT will be performed 28697504071SAmlan Barua 28797504071SAmlan Barua Output parameter: 28897504071SAmlan Barua y - vector that stores result of BDFT 28997504071SAmlan Barua */ 290b2b77a04SHong Zhang #undef __FUNCT__ 291b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 292b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 293b2b77a04SHong Zhang { 294b2b77a04SHong Zhang PetscErrorCode ierr; 295b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 296b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 297b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 298c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 299ce94432eSBarry Smith MPI_Comm comm; 300b2b77a04SHong Zhang 301b2b77a04SHong Zhang PetscFunctionBegin; 302ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 303b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 304b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 305b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 306b2b77a04SHong Zhang switch (ndim) { 307b2b77a04SHong Zhang case 1: 3083c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 309b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 310ae0a50aaSHong Zhang #else 3114f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 3123c3a9c75SAmlan Barua #endif 313b2b77a04SHong Zhang break; 314b2b77a04SHong Zhang case 2: 31597504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 316b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3173c3a9c75SAmlan Barua #else 3183c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3193c3a9c75SAmlan Barua #endif 320b2b77a04SHong Zhang break; 321b2b77a04SHong Zhang case 3: 3223c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 323b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3243c3a9c75SAmlan Barua #else 3253c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3263c3a9c75SAmlan Barua #endif 327b2b77a04SHong Zhang break; 328b2b77a04SHong Zhang default: 3293c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 330c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3313c3a9c75SAmlan Barua #else 3323c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3333c3a9c75SAmlan Barua #endif 334b2b77a04SHong Zhang break; 335b2b77a04SHong Zhang } 336b2b77a04SHong Zhang fftw->binarray = x_array; 337b2b77a04SHong Zhang fftw->boutarray = y_array; 338b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 339b2b77a04SHong Zhang } else { /* use existing plan */ 340b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 341b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 342b2b77a04SHong Zhang } else { 343b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 344b2b77a04SHong Zhang } 345b2b77a04SHong Zhang } 346b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 347b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 348b2b77a04SHong Zhang PetscFunctionReturn(0); 349b2b77a04SHong Zhang } 350b2b77a04SHong Zhang 351b2b77a04SHong Zhang #undef __FUNCT__ 352b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 353b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 354b2b77a04SHong Zhang { 355b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 356b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 357b2b77a04SHong Zhang PetscErrorCode ierr; 358b2b77a04SHong Zhang 359b2b77a04SHong Zhang PetscFunctionBegin; 360b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 361b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 3628c1d5ab3SHong Zhang if (fftw->iodims) { 3638c1d5ab3SHong Zhang free(fftw->iodims); 3648c1d5ab3SHong Zhang } 365bb5bf6f6SHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 366bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 3676ccf0b3eSHong Zhang fftw_mpi_cleanup(); 368b2b77a04SHong Zhang PetscFunctionReturn(0); 369b2b77a04SHong Zhang } 370b2b77a04SHong Zhang 371c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 372b2b77a04SHong Zhang #undef __FUNCT__ 373b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 374b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 375b2b77a04SHong Zhang { 376b2b77a04SHong Zhang PetscErrorCode ierr; 377b2b77a04SHong Zhang PetscScalar *array; 378b2b77a04SHong Zhang 379b2b77a04SHong Zhang PetscFunctionBegin; 380b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 381b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 382b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 383b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 384b2b77a04SHong Zhang PetscFunctionReturn(0); 385b2b77a04SHong Zhang } 386b2b77a04SHong Zhang 3874f7415efSAmlan Barua #undef __FUNCT__ 3884be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW" 3894be45526SHong Zhang /*@ 390b2aa233eSHong Zhang MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 3914f7415efSAmlan Barua parallel layout determined by FFTW 3924f7415efSAmlan Barua 3934f7415efSAmlan Barua Collective on Mat 3944f7415efSAmlan Barua 3954f7415efSAmlan Barua Input Parameter: 3963564f093SHong Zhang . A - the matrix 3974f7415efSAmlan Barua 3984f7415efSAmlan Barua Output Parameter: 399cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW 400cc55f3b1SHong Zhang - y - (optional) output vector of forward FFTW 401cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW 4024f7415efSAmlan Barua 4034f7415efSAmlan Barua Level: advanced 4043564f093SHong Zhang 40597504071SAmlan Barua Note: The parallel layout of output of forward FFTW is always same as the input 40697504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 40797504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 40897504071SAmlan Barua Also note that we need to provide enough space while doing parallel real transform. 40997504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 41097504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 411e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 412e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two 413e0ec536eSAmlan Barua zeros if it is odd only one zero is needed. 414e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local 415e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for 416e0ec536eSAmlan Barua each processor and returns that. 4174f7415efSAmlan Barua 4184f7415efSAmlan Barua .seealso: MatCreateFFTW() 4194be45526SHong Zhang @*/ 4204be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 4214be45526SHong Zhang { 4224be45526SHong Zhang PetscErrorCode ierr; 423b4c3921fSHong Zhang 4244be45526SHong Zhang PetscFunctionBegin; 4254be45526SHong Zhang ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 4264be45526SHong Zhang PetscFunctionReturn(0); 4274be45526SHong Zhang } 4284be45526SHong Zhang 4294be45526SHong Zhang #undef __FUNCT__ 4304be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW" 4314be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 4324f7415efSAmlan Barua { 4334f7415efSAmlan Barua PetscErrorCode ierr; 4344f7415efSAmlan Barua PetscMPIInt size,rank; 435ce94432eSBarry Smith MPI_Comm comm; 4364f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 4374f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 4389496c9aeSAmlan Barua PetscInt N = fft->N; 4394f7415efSAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 4404f7415efSAmlan Barua 4414f7415efSAmlan Barua PetscFunctionBegin; 4424f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 4434f7415efSAmlan Barua PetscValidType(A,1); 444ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4454f7415efSAmlan Barua 4464f8276c3SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 4474f8276c3SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 4484f7415efSAmlan Barua if (size == 1) { /* sequential case */ 4494f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4504f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 4514f7415efSAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 4524f7415efSAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 4534f7415efSAmlan Barua #else 4548ca4c034SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 4558ca4c034SAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 4568ca4c034SAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 4574f7415efSAmlan Barua #endif 45897504071SAmlan Barua } else { /* parallel cases */ 4594f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 4609496c9aeSAmlan Barua ptrdiff_t local_n1; 4619496c9aeSAmlan Barua fftw_complex *data_fout; 4629496c9aeSAmlan Barua ptrdiff_t local_1_start; 4639496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4649496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 4659496c9aeSAmlan Barua #else 4664f7415efSAmlan Barua double *data_finr,*data_boutr; 4676ccf0b3eSHong Zhang PetscInt n1,N1; 4689496c9aeSAmlan Barua ptrdiff_t temp; 4699496c9aeSAmlan Barua #endif 4709496c9aeSAmlan Barua 4714f7415efSAmlan Barua switch (ndim) { 4724f7415efSAmlan Barua case 1: 47357625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4744f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 47557625b84SAmlan Barua #else 47657625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 47757625b84SAmlan Barua if (fin) { 47857625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 479778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 48026fbe8dcSKarl Rupp 48157625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 48257625b84SAmlan Barua } 48357625b84SAmlan Barua if (fout) { 48457625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 485778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 48626fbe8dcSKarl Rupp 48757625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 48857625b84SAmlan Barua } 48957625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 49057625b84SAmlan Barua if (bout) { 49157625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 492778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 49326fbe8dcSKarl Rupp 49457625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 49557625b84SAmlan Barua } 49657625b84SAmlan Barua break; 49757625b84SAmlan Barua #endif 4984f7415efSAmlan Barua case 2: 49997504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 5004f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5014f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 5024f7415efSAmlan Barua if (fin) { 5034f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 504778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 50526fbe8dcSKarl Rupp 5064f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5074f7415efSAmlan Barua } 5084f7415efSAmlan Barua if (bout) { 5094f7415efSAmlan Barua data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 510778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 51126fbe8dcSKarl Rupp 5124f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5134f7415efSAmlan Barua } 51457625b84SAmlan Barua if (fout) { 51557625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 516778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 51726fbe8dcSKarl Rupp 51857625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 51957625b84SAmlan Barua } 5204f7415efSAmlan Barua #else 5214f7415efSAmlan Barua /* Get local size */ 5224f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 5234f7415efSAmlan Barua if (fin) { 5244f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 525778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 52626fbe8dcSKarl Rupp 5274f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5284f7415efSAmlan Barua } 5294f7415efSAmlan Barua if (fout) { 5304f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 531778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 53226fbe8dcSKarl Rupp 5334f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5344f7415efSAmlan Barua } 5354f7415efSAmlan Barua if (bout) { 5364f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 537778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 53826fbe8dcSKarl Rupp 5394f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5404f7415efSAmlan Barua } 5414f7415efSAmlan Barua #endif 5424f7415efSAmlan Barua break; 5434f7415efSAmlan Barua case 3: 5444f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5454f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5464f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 5474f7415efSAmlan Barua if (fin) { 5484f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 549778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 55026fbe8dcSKarl Rupp 5514f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5524f7415efSAmlan Barua } 5534f7415efSAmlan Barua if (bout) { 5544f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 555778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 55626fbe8dcSKarl Rupp 5574f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5584f7415efSAmlan Barua } 5593564f093SHong Zhang 56057625b84SAmlan Barua if (fout) { 56157625b84SAmlan Barua data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 562778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 56326fbe8dcSKarl Rupp 56457625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 56557625b84SAmlan Barua } 5664f7415efSAmlan Barua #else 5670c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 5680c9b18e4SAmlan Barua if (fin) { 5690c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 570778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 57126fbe8dcSKarl Rupp 5720c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5730c9b18e4SAmlan Barua } 5740c9b18e4SAmlan Barua if (fout) { 5750c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 576778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 57726fbe8dcSKarl Rupp 5780c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5790c9b18e4SAmlan Barua } 5800c9b18e4SAmlan Barua if (bout) { 5810c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 582778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 58326fbe8dcSKarl Rupp 5840c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5850c9b18e4SAmlan Barua } 5864f7415efSAmlan Barua #endif 5874f7415efSAmlan Barua break; 5884f7415efSAmlan Barua default: 5894f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5904f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 59126fbe8dcSKarl Rupp 5924f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 59326fbe8dcSKarl Rupp 5944f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5954f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 59626fbe8dcSKarl Rupp 5974f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 5984f7415efSAmlan Barua 5994f7415efSAmlan Barua if (fin) { 6004f7415efSAmlan Barua data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 601778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 60226fbe8dcSKarl Rupp 6034f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6044f7415efSAmlan Barua } 6054f7415efSAmlan Barua if (bout) { 6064f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 607778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 60826fbe8dcSKarl Rupp 6099496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6104f7415efSAmlan Barua } 6113564f093SHong Zhang 61257625b84SAmlan Barua if (fout) { 61357625b84SAmlan Barua data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 614778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 61526fbe8dcSKarl Rupp 61657625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 61757625b84SAmlan Barua } 6184f7415efSAmlan Barua #else 6190c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 6200c9b18e4SAmlan Barua if (fin) { 6210c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 622778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 62326fbe8dcSKarl Rupp 6240c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6250c9b18e4SAmlan Barua } 6260c9b18e4SAmlan Barua if (fout) { 6270c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 628778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 62926fbe8dcSKarl Rupp 6300c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6310c9b18e4SAmlan Barua } 6320c9b18e4SAmlan Barua if (bout) { 6330c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 634778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 63526fbe8dcSKarl Rupp 6360c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6370c9b18e4SAmlan Barua } 6384f7415efSAmlan Barua #endif 6394f7415efSAmlan Barua break; 6404f7415efSAmlan Barua } 6414f7415efSAmlan Barua } 6424f7415efSAmlan Barua PetscFunctionReturn(0); 6434f7415efSAmlan Barua } 6444f7415efSAmlan Barua 645c92418ccSAmlan Barua #undef __FUNCT__ 64674a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW" 6473564f093SHong Zhang /*@ 6483564f093SHong Zhang VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 64954efbe56SHong Zhang 6503564f093SHong Zhang Collective on Mat 6513564f093SHong Zhang 6523564f093SHong Zhang Input Parameters: 6533564f093SHong Zhang + A - FFTW matrix 6543564f093SHong Zhang - x - the PETSc vector 6553564f093SHong Zhang 6563564f093SHong Zhang Output Parameters: 6573564f093SHong Zhang . y - the FFTW vector 6583564f093SHong Zhang 659b2b77a04SHong Zhang Options Database Keys: 6603564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags 661b2b77a04SHong Zhang 662b2b77a04SHong Zhang Level: intermediate 6633564f093SHong Zhang 66497504071SAmlan Barua Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 66597504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 66697504071SAmlan Barua zeros. This routine does that job by scattering operation. 66797504071SAmlan Barua 6683564f093SHong Zhang .seealso: VecScatterFFTWToPetsc() 6693564f093SHong Zhang @*/ 6703564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 6713564f093SHong Zhang { 6723564f093SHong Zhang PetscErrorCode ierr; 673b2b77a04SHong Zhang 6743564f093SHong Zhang PetscFunctionBegin; 6753564f093SHong Zhang ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 6763564f093SHong Zhang PetscFunctionReturn(0); 6773564f093SHong Zhang } 6783c3a9c75SAmlan Barua 6793c3a9c75SAmlan Barua #undef __FUNCT__ 6801986ecc6SHong Zhang #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 68174a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 6823c3a9c75SAmlan Barua { 6833c3a9c75SAmlan Barua PetscErrorCode ierr; 684ce94432eSBarry Smith MPI_Comm comm; 6853c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 6863c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 6879496c9aeSAmlan Barua PetscInt N =fft->N; 688b5d11533SAmlan Barua PetscInt ndim =fft->ndim,*dim=fft->dim; 6899496c9aeSAmlan Barua PetscInt low; 6907a21806cSHong Zhang PetscMPIInt rank,size; 6917a21806cSHong Zhang PetscInt vsize,vsize1; 6927a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 6939496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 6943c3a9c75SAmlan Barua VecScatter vecscat; 6953c3a9c75SAmlan Barua IS list1,list2; 6969496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6979496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 6989496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 699a31b9140SHong Zhang PetscInt NM; 7009496c9aeSAmlan Barua ptrdiff_t temp; 7019496c9aeSAmlan Barua #endif 7023c3a9c75SAmlan Barua 7033564f093SHong Zhang PetscFunctionBegin; 704ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 705f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 706f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 7070298fd71SBarry Smith ierr = VecGetOwnershipRange(y,&low,NULL); 7083c3a9c75SAmlan Barua 7093564f093SHong Zhang if (size==1) { 7108ca4c034SAmlan Barua ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 7118ca4c034SAmlan Barua ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 7129496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 7136971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 7146971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7156971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7166971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 717b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 7183564f093SHong Zhang } else { 7193c3a9c75SAmlan Barua switch (ndim) { 7203c3a9c75SAmlan Barua case 1: 72164657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7227a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 72326fbe8dcSKarl Rupp 7244f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 7254f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,low,1,&list2); 72664657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 72764657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 72864657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 72964657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 73064657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 73164657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 73264657d84SAmlan Barua #else 733e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 73464657d84SAmlan Barua #endif 7353c3a9c75SAmlan Barua break; 7363c3a9c75SAmlan Barua case 2: 737bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7387a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 73926fbe8dcSKarl Rupp 7404f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 7414f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 742bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 743bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 744bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 745bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 746bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 747bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 748bd59e6c6SAmlan Barua #else 749c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 75026fbe8dcSKarl Rupp 7519496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 7529496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 7533c3a9c75SAmlan Barua 7543564f093SHong Zhang if (dim[1]%2==0) { 7553c3a9c75SAmlan Barua NM = dim[1]+2; 7563564f093SHong Zhang } else { 7573c3a9c75SAmlan Barua NM = dim[1]+1; 7583564f093SHong Zhang } 7593c3a9c75SAmlan Barua for (i=0; i<local_n0; i++) { 7603c3a9c75SAmlan Barua for (j=0; j<dim[1]; j++) { 7613c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 7623c3a9c75SAmlan Barua tempindx1 = i*NM + j; 76326fbe8dcSKarl Rupp 7645b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 7653c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 7663c3a9c75SAmlan Barua } 7673c3a9c75SAmlan Barua } 7683c3a9c75SAmlan Barua 7693c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 7703c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 7713c3a9c75SAmlan Barua 772f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 773f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 774f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 775f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 776b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 777b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 778b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 779b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 780bd59e6c6SAmlan Barua #endif 7819496c9aeSAmlan Barua break; 7823c3a9c75SAmlan Barua 7833c3a9c75SAmlan Barua case 3: 784bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7857a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 78626fbe8dcSKarl Rupp 7874f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 7884f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 789bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 790bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 791bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 792bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 793bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 794bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 795bd59e6c6SAmlan Barua #else 796*f1ade23cSHong Zhang /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */ 797*f1ade23cSHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform"); 7987a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 79926fbe8dcSKarl Rupp 8009496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 8019496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 80251d1eed7SAmlan Barua 803db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 804db4deed7SKarl Rupp else NM = dim[2]+1; 80551d1eed7SAmlan Barua 80651d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 80751d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 80851d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 80951d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 81051d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 81126fbe8dcSKarl Rupp 81251d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 81351d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 81451d1eed7SAmlan Barua } 81551d1eed7SAmlan Barua } 81651d1eed7SAmlan Barua } 81751d1eed7SAmlan Barua 81851d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 81951d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 82051d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 82151d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 82251d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 82351d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 824b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 825b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 826b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 827b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 828bd59e6c6SAmlan Barua #endif 8299496c9aeSAmlan Barua break; 8303c3a9c75SAmlan Barua 8313c3a9c75SAmlan Barua default: 832bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8337a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 83426fbe8dcSKarl Rupp 8354f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 8364f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 837bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 838bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 839bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 840bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 841bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 842bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 843bd59e6c6SAmlan Barua #else 844*f1ade23cSHong Zhang /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */ 845*f1ade23cSHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform"); 846e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 84726fbe8dcSKarl Rupp 848e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 84926fbe8dcSKarl Rupp 8507a21806cSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 85126fbe8dcSKarl Rupp 852e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 853e5d7f247SAmlan Barua 854e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 855e5d7f247SAmlan Barua 856e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 857e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 858e5d7f247SAmlan Barua 859db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 860db4deed7SKarl Rupp else NM = 1; 861e5d7f247SAmlan Barua 8626971673cSAmlan Barua j = low; 8633564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 8646971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 8656971673cSAmlan Barua indx2[i] = j; 86626fbe8dcSKarl Rupp if (k%dim[ndim-1]==0) j+=NM; 8676971673cSAmlan Barua j++; 8686971673cSAmlan Barua } 8696971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 8706971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 8716971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 8726971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8736971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8746971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 875b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 876b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 877b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 878b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 879bd59e6c6SAmlan Barua #endif 8809496c9aeSAmlan Barua break; 8813c3a9c75SAmlan Barua } 882e81bb599SAmlan Barua } 8833564f093SHong Zhang PetscFunctionReturn(0); 8843c3a9c75SAmlan Barua } 8853c3a9c75SAmlan Barua 8863c3a9c75SAmlan Barua #undef __FUNCT__ 88774a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc" 8883564f093SHong Zhang /*@ 8893564f093SHong Zhang VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 8903564f093SHong Zhang 8913564f093SHong Zhang Collective on Mat 8923564f093SHong Zhang 8933564f093SHong Zhang Input Parameters: 8943564f093SHong Zhang + A - FFTW matrix 8953564f093SHong Zhang - x - FFTW vector 8963564f093SHong Zhang 8973564f093SHong Zhang Output Parameters: 8983564f093SHong Zhang . y - PETSc vector 8993564f093SHong Zhang 9003564f093SHong Zhang Level: intermediate 9013564f093SHong Zhang 9023564f093SHong Zhang Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 9033564f093SHong Zhang VecScatterFFTWToPetsc removes those extra zeros. 9043564f093SHong Zhang 9053564f093SHong Zhang .seealso: VecScatterPetscToFFTW() 9063564f093SHong Zhang @*/ 90774a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 9083c3a9c75SAmlan Barua { 9093c3a9c75SAmlan Barua PetscErrorCode ierr; 9105fd66863SKarl Rupp 9113c3a9c75SAmlan Barua PetscFunctionBegin; 91274a26884SAmlan Barua ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 9133c3a9c75SAmlan Barua PetscFunctionReturn(0); 9143c3a9c75SAmlan Barua } 91554efbe56SHong Zhang 9163c3a9c75SAmlan Barua #undef __FUNCT__ 91774a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 91874a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 9195b3e41ffSAmlan Barua { 9205b3e41ffSAmlan Barua PetscErrorCode ierr; 921ce94432eSBarry Smith MPI_Comm comm; 9225b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 9235b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 9249496c9aeSAmlan Barua PetscInt N = fft->N; 925b3655f67SAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim; 9269496c9aeSAmlan Barua PetscInt low; 9277a21806cSHong Zhang PetscMPIInt rank,size; 9287a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 9299496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 9305b3e41ffSAmlan Barua VecScatter vecscat; 9315b3e41ffSAmlan Barua IS list1,list2; 9329496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 9339496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 9349496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 935a31b9140SHong Zhang PetscInt NM; 9369496c9aeSAmlan Barua ptrdiff_t temp; 9379496c9aeSAmlan Barua #endif 9389496c9aeSAmlan Barua 9393564f093SHong Zhang PetscFunctionBegin; 940ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 9415b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 9425b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 9430298fd71SBarry Smith ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 9445b3e41ffSAmlan Barua 945e81bb599SAmlan Barua if (size==1) { 946b3655f67SAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 9476971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 9486971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9496971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9506971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 951b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 952e81bb599SAmlan Barua 9533564f093SHong Zhang } else { 954e81bb599SAmlan Barua switch (ndim) { 955e81bb599SAmlan Barua case 1: 95664657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 9577a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 95826fbe8dcSKarl Rupp 9594f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 9604f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,low,1,&list2); 96164657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 96264657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 96364657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 96464657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 96564657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 96664657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 96764657d84SAmlan Barua #else 9686a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 96964657d84SAmlan Barua #endif 9705b3e41ffSAmlan Barua break; 9715b3e41ffSAmlan Barua case 2: 972bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 9737a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 97426fbe8dcSKarl Rupp 9754f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 9764f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 977bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 978bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 979bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 980bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 981bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 982bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 983bd59e6c6SAmlan Barua #else 9847a21806cSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 98526fbe8dcSKarl Rupp 9869496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 9879496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 9885b3e41ffSAmlan Barua 989db4deed7SKarl Rupp if (dim[1]%2==0) NM = dim[1]+2; 990db4deed7SKarl Rupp else NM = dim[1]+1; 9915b3e41ffSAmlan Barua 9925b3e41ffSAmlan Barua for (i=0; i<local_n0; i++) { 9935b3e41ffSAmlan Barua for (j=0; j<dim[1]; j++) { 9945b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 9955b3e41ffSAmlan Barua tempindx1 = i*NM + j; 99626fbe8dcSKarl Rupp 9975b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 9985b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 9995b3e41ffSAmlan Barua } 10005b3e41ffSAmlan Barua } 10015b3e41ffSAmlan Barua 10025b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 10035b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 10045b3e41ffSAmlan Barua 10055b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 10065b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10075b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10085b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1009b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1010b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1011b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1012b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1013bd59e6c6SAmlan Barua #endif 10149496c9aeSAmlan Barua break; 10155b3e41ffSAmlan Barua case 3: 1016bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10177a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 101826fbe8dcSKarl Rupp 10194f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 10204f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 1021bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1022bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1023bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1024bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1025bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1026bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1027bd59e6c6SAmlan Barua #else 10287a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 102926fbe8dcSKarl Rupp 10309496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 10319496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 103251d1eed7SAmlan Barua 1033db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 1034db4deed7SKarl Rupp else NM = dim[2]+1; 103551d1eed7SAmlan Barua 103651d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 103751d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 103851d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 103951d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 104051d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 104126fbe8dcSKarl Rupp 104251d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 104351d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 104451d1eed7SAmlan Barua } 104551d1eed7SAmlan Barua } 104651d1eed7SAmlan Barua } 104751d1eed7SAmlan Barua 104851d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 104951d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 105051d1eed7SAmlan Barua 105151d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 105251d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 105351d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 105451d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1055b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1056b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1057b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1058b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1059bd59e6c6SAmlan Barua #endif 10609496c9aeSAmlan Barua break; 10615b3e41ffSAmlan Barua default: 1062bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10637a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 106426fbe8dcSKarl Rupp 10654f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 10664f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1067bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1068bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1069bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1070bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1071bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1072bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1073bd59e6c6SAmlan Barua #else 1074ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 107526fbe8dcSKarl Rupp 1076ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 107726fbe8dcSKarl Rupp 1078c3eba89aSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 107926fbe8dcSKarl Rupp 1080ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1081ba6e06d0SAmlan Barua 1082ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1083ba6e06d0SAmlan Barua 1084ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1085ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1086ba6e06d0SAmlan Barua 1087db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 1088db4deed7SKarl Rupp else NM = 1; 1089ba6e06d0SAmlan Barua 1090ba6e06d0SAmlan Barua j = low; 10913564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1092ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1093ba6e06d0SAmlan Barua indx2[i] = j; 10943564f093SHong Zhang if (k%dim[ndim-1]==0) j+=NM; 1095ba6e06d0SAmlan Barua j++; 1096ba6e06d0SAmlan Barua } 1097ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1098ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1099ba6e06d0SAmlan Barua 1100ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1101ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1102ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1103ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1104b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1105b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1106b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1107b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1108bd59e6c6SAmlan Barua #endif 11099496c9aeSAmlan Barua break; 11105b3e41ffSAmlan Barua } 1111e81bb599SAmlan Barua } 11123564f093SHong Zhang PetscFunctionReturn(0); 11135b3e41ffSAmlan Barua } 11145b3e41ffSAmlan Barua 11155b3e41ffSAmlan Barua #undef __FUNCT__ 11163c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 11173c3a9c75SAmlan Barua /* 11183564f093SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 11193564f093SHong Zhang 11203c3a9c75SAmlan Barua Options Database Keys: 11213c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 11223c3a9c75SAmlan Barua 11233c3a9c75SAmlan Barua Level: intermediate 11243c3a9c75SAmlan Barua 11253c3a9c75SAmlan Barua */ 11268cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1127b2b77a04SHong Zhang { 1128b2b77a04SHong Zhang PetscErrorCode ierr; 1129ce94432eSBarry Smith MPI_Comm comm; 1130b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1131b2b77a04SHong Zhang Mat_FFTW *fftw; 1132b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 11335274ed1bSHong Zhang const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 11345274ed1bSHong Zhang unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1135b2b77a04SHong Zhang PetscBool flg; 11364f48bc67SAmlan Barua PetscInt p_flag,partial_dim=1,ctr; 113711902ff2SHong Zhang PetscMPIInt size,rank; 11389496c9aeSAmlan Barua ptrdiff_t *pdim; 11399496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 11409496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11419496c9aeSAmlan Barua ptrdiff_t temp; 11428ca4c034SAmlan Barua PetscInt N1,tot_dim; 11434f48bc67SAmlan Barua #else 11444f48bc67SAmlan Barua PetscInt n1; 11459496c9aeSAmlan Barua #endif 11469496c9aeSAmlan Barua 1147b2b77a04SHong Zhang PetscFunctionBegin; 1148ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1149b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 115011902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1151c92418ccSAmlan Barua 11521878d478SAmlan Barua fftw_mpi_init(); 115311902ff2SHong Zhang pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 115411902ff2SHong Zhang pdim[0] = dim[0]; 11558ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11568ca4c034SAmlan Barua tot_dim = 2*dim[0]; 11578ca4c034SAmlan Barua #endif 11583564f093SHong Zhang for (ctr=1; ctr<ndim; ctr++) { 11596882372aSHong Zhang partial_dim *= dim[ctr]; 116011902ff2SHong Zhang pdim[ctr] = dim[ctr]; 11618ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1162db4deed7SKarl Rupp if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1163db4deed7SKarl Rupp else tot_dim *= dim[ctr]; 11648ca4c034SAmlan Barua #endif 11656882372aSHong Zhang } 11663c3a9c75SAmlan Barua 1167b2b77a04SHong Zhang if (size == 1) { 1168e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1169b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1170b2b77a04SHong Zhang n = N; 1171e81bb599SAmlan Barua #else 1172e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1173e81bb599SAmlan Barua n = tot_dim; 1174e81bb599SAmlan Barua #endif 1175e81bb599SAmlan Barua 1176b2b77a04SHong Zhang } else { 11777a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 1178b2b77a04SHong Zhang switch (ndim) { 1179b2b77a04SHong Zhang case 1: 11803c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11813c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1182e5d7f247SAmlan Barua #else 11837a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 118426fbe8dcSKarl Rupp 11856882372aSHong Zhang n = (PetscInt)local_n0; 11869496c9aeSAmlan Barua n1 = (PetscInt)local_n1; 11879496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1188e5d7f247SAmlan Barua #endif 1189b2b77a04SHong Zhang break; 1190b2b77a04SHong Zhang case 2: 11915b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 11927a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1193b2b77a04SHong Zhang /* 1194b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 11950ec8b6e3SBarry Smith PetscSynchronizedFlush(comm,PETSC_STDOUT); 1196b2b77a04SHong Zhang */ 1197b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1198b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 11995b3e41ffSAmlan Barua #else 1200c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 120126fbe8dcSKarl Rupp 12025b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1203c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 12045b3e41ffSAmlan Barua #endif 1205b2b77a04SHong Zhang break; 1206b2b77a04SHong Zhang case 3: 120751d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12087a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 120926fbe8dcSKarl Rupp 12106882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 12116882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 121251d1eed7SAmlan Barua #else 1213c3eba89aSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 121426fbe8dcSKarl Rupp 121551d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1216c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 121751d1eed7SAmlan Barua #endif 1218b2b77a04SHong Zhang break; 1219b2b77a04SHong Zhang default: 1220b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12217a21806cSHong Zhang fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 122226fbe8dcSKarl Rupp 12236882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 12246882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1225b3a17365SAmlan Barua #else 1226b3a17365SAmlan Barua temp = pdim[ndim-1]; 122726fbe8dcSKarl Rupp 1228b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 122926fbe8dcSKarl Rupp 1230c3eba89aSHong Zhang fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 123126fbe8dcSKarl Rupp 1232b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1233b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 123426fbe8dcSKarl Rupp 1235b3a17365SAmlan Barua pdim[ndim-1] = temp; 123626fbe8dcSKarl Rupp 1237c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1238b3a17365SAmlan Barua #endif 1239b2b77a04SHong Zhang break; 1240b2b77a04SHong Zhang } 1241b2b77a04SHong Zhang } 1242b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1243b00a9115SJed Brown ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1244b2b77a04SHong Zhang fft->data = (void*)fftw; 1245b2b77a04SHong Zhang 1246b2b77a04SHong Zhang fft->n = n; 12470c74a584SJed Brown fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1248e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 124926fbe8dcSKarl Rupp 12505e806cb5SMatthew G. Knepley ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 12518c1d5ab3SHong Zhang if (size == 1) { 1252a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 1253a1b6d50cSHong Zhang fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim); 1254a1b6d50cSHong Zhang #else 12558c1d5ab3SHong Zhang fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1256a1b6d50cSHong Zhang #endif 12578c1d5ab3SHong Zhang } 125826fbe8dcSKarl Rupp 1259b1301b2eSHong Zhang for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1260c92418ccSAmlan Barua 1261b2b77a04SHong Zhang fftw->p_forward = 0; 1262b2b77a04SHong Zhang fftw->p_backward = 0; 1263b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1264b2b77a04SHong Zhang 1265b2b77a04SHong Zhang if (size == 1) { 1266b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1267b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1268b2b77a04SHong Zhang } else { 1269b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1270b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1271b2b77a04SHong Zhang } 1272b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1273b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 12744a52bad8SHong Zhang A->preallocated = PETSC_TRUE; 127526fbe8dcSKarl Rupp 1276bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr); 1277bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1278bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1279b2b77a04SHong Zhang 1280b2b77a04SHong Zhang /* get runtime options */ 1281ce94432eSBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 12825274ed1bSHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 12835274ed1bSHong Zhang if (flg) { 12845274ed1bSHong Zhang fftw->p_flag = iplans[p_flag]; 12855274ed1bSHong Zhang } 12864a52bad8SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 1287b2b77a04SHong Zhang PetscFunctionReturn(0); 1288b2b77a04SHong Zhang } 12893c3a9c75SAmlan Barua 12903c3a9c75SAmlan Barua 12913c3a9c75SAmlan Barua 12923c3a9c75SAmlan Barua 1293