1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 148c1d5ab3SHong Zhang fftw_iodim *iodims; 15e5d7f247SAmlan Barua PetscInt partial_dim; 16b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 17b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 18b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 19b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 20b2b77a04SHong Zhang } Mat_FFTW; 21b2b77a04SHong Zhang 22b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 26b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 27b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 28b2b77a04SHong Zhang 2997504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel 3097504071SAmlan Barua Input parameter: 313564f093SHong Zhang A - the matrix 3297504071SAmlan Barua x - the vector on which FDFT will be performed 3397504071SAmlan Barua 3497504071SAmlan Barua Output parameter: 3597504071SAmlan Barua y - vector that stores result of FDFT 3697504071SAmlan Barua */ 37b2b77a04SHong Zhang #undef __FUNCT__ 38b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 39b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 40b2b77a04SHong Zhang { 41b2b77a04SHong Zhang PetscErrorCode ierr; 42b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 43b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 44b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 458c1d5ab3SHong Zhang fftw_iodim *iodims; 46bb5bf6f6SHong Zhang PetscInt ndim = fft->ndim,*dim = fft->dim,i; 47b2b77a04SHong Zhang 48b2b77a04SHong Zhang PetscFunctionBegin; 49b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 50b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 51b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 52b2b77a04SHong Zhang switch (ndim) { 53b2b77a04SHong Zhang case 1: 5458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 55b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 5658a912c5SAmlan Barua #else 5758a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 5858a912c5SAmlan Barua #endif 59b2b77a04SHong Zhang break; 60b2b77a04SHong Zhang case 2: 6158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 62b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6358a912c5SAmlan Barua #else 6458a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 6558a912c5SAmlan Barua #endif 66b2b77a04SHong Zhang break; 67b2b77a04SHong Zhang case 3: 6858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 69b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 7058a912c5SAmlan Barua #else 7158a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 7258a912c5SAmlan Barua #endif 73b2b77a04SHong Zhang break; 74b2b77a04SHong Zhang default: 7558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 768c1d5ab3SHong Zhang iodims = (fftw_iodim*)fftw->iodims; 778c1d5ab3SHong Zhang if (ndim) { 788c1d5ab3SHong Zhang iodims[ndim-1].n = dim[ndim-1]; 798c1d5ab3SHong Zhang iodims[ndim-1].is = iodims[ndim-1].os = 1; 808c1d5ab3SHong Zhang for (i=ndim-2; i>=0; --i) { 818c1d5ab3SHong Zhang iodims[i].n = dim[i]; 828c1d5ab3SHong Zhang iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 838c1d5ab3SHong Zhang } 848c1d5ab3SHong Zhang } 858c1d5ab3SHong Zhang fftw->p_forward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 8658a912c5SAmlan Barua #else 8758a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 8858a912c5SAmlan Barua #endif 89b2b77a04SHong Zhang break; 90b2b77a04SHong Zhang } 91b2b77a04SHong Zhang fftw->finarray = x_array; 92b2b77a04SHong Zhang fftw->foutarray = y_array; 93b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 94b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 95b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 96b2b77a04SHong Zhang } else { /* use existing plan */ 97b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 98b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 99b2b77a04SHong Zhang } else { 100b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 101b2b77a04SHong Zhang } 102b2b77a04SHong Zhang } 103b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 104b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 105b2b77a04SHong Zhang PetscFunctionReturn(0); 106b2b77a04SHong Zhang } 107b2b77a04SHong Zhang 10897504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT 10997504071SAmlan Barua Input parameter: 1103564f093SHong Zhang A - the matrix 11197504071SAmlan Barua x - the vector on which BDFT will be performed 11297504071SAmlan Barua 11397504071SAmlan Barua Output parameter: 11497504071SAmlan Barua y - vector that stores result of BDFT 11597504071SAmlan Barua */ 11697504071SAmlan Barua 117b2b77a04SHong Zhang #undef __FUNCT__ 118b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 119b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 120b2b77a04SHong Zhang { 121b2b77a04SHong Zhang PetscErrorCode ierr; 122b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 123b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 124b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 125b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 1268c1d5ab3SHong Zhang fftw_iodim *iodims=(fftw_iodim*)fftw->iodims; 127b2b77a04SHong Zhang 128b2b77a04SHong Zhang PetscFunctionBegin; 129b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 130b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 131b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 132b2b77a04SHong Zhang switch (ndim) { 133b2b77a04SHong Zhang case 1: 13458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 135b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 13658a912c5SAmlan Barua #else 137e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 13858a912c5SAmlan Barua #endif 139b2b77a04SHong Zhang break; 140b2b77a04SHong Zhang case 2: 14158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 142b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 14358a912c5SAmlan Barua #else 144e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 14558a912c5SAmlan Barua #endif 146b2b77a04SHong Zhang break; 147b2b77a04SHong Zhang case 3: 14858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 149b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 15058a912c5SAmlan Barua #else 151e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 15258a912c5SAmlan Barua #endif 153b2b77a04SHong Zhang break; 154b2b77a04SHong Zhang default: 15558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1568c1d5ab3SHong Zhang fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 15758a912c5SAmlan Barua #else 158e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 15958a912c5SAmlan Barua #endif 160b2b77a04SHong Zhang break; 161b2b77a04SHong Zhang } 162b2b77a04SHong Zhang fftw->binarray = x_array; 163b2b77a04SHong Zhang fftw->boutarray = y_array; 164b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 165b2b77a04SHong Zhang } else { /* use existing plan */ 166b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 167b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 168b2b77a04SHong Zhang } else { 169b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 170b2b77a04SHong Zhang } 171b2b77a04SHong Zhang } 172b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 173b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 174b2b77a04SHong Zhang PetscFunctionReturn(0); 175b2b77a04SHong Zhang } 176b2b77a04SHong Zhang 17797504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel 17897504071SAmlan Barua Input parameter: 1793564f093SHong Zhang A - the matrix 18097504071SAmlan Barua x - the vector on which FDFT will be performed 18197504071SAmlan Barua 18297504071SAmlan Barua Output parameter: 18397504071SAmlan Barua y - vector that stores result of FDFT 18497504071SAmlan Barua */ 185b2b77a04SHong Zhang #undef __FUNCT__ 186b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 187b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 188b2b77a04SHong Zhang { 189b2b77a04SHong Zhang PetscErrorCode ierr; 190b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 191b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 192b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 193c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 194ce94432eSBarry Smith MPI_Comm comm; 195b2b77a04SHong Zhang 196b2b77a04SHong Zhang PetscFunctionBegin; 197ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 198b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 199b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 200b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 201b2b77a04SHong Zhang switch (ndim) { 202b2b77a04SHong Zhang case 1: 2033c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 204b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 205ae0a50aaSHong Zhang #else 2064f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 2073c3a9c75SAmlan Barua #endif 208b2b77a04SHong Zhang break; 209b2b77a04SHong Zhang case 2: 21097504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 211b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2123c3a9c75SAmlan Barua #else 2133c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2143c3a9c75SAmlan Barua #endif 215b2b77a04SHong Zhang break; 216b2b77a04SHong Zhang case 3: 2173c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 218b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2193c3a9c75SAmlan Barua #else 22051d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2213c3a9c75SAmlan Barua #endif 222b2b77a04SHong Zhang break; 223b2b77a04SHong Zhang default: 2243c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 225c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2263c3a9c75SAmlan Barua #else 2273c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2283c3a9c75SAmlan Barua #endif 229b2b77a04SHong Zhang break; 230b2b77a04SHong Zhang } 231b2b77a04SHong Zhang fftw->finarray = x_array; 232b2b77a04SHong Zhang fftw->foutarray = y_array; 233b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 234b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 235b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 236b2b77a04SHong Zhang } else { /* use existing plan */ 237b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 238b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 239b2b77a04SHong Zhang } else { 240b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 241b2b77a04SHong Zhang } 242b2b77a04SHong Zhang } 243b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 244b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 245b2b77a04SHong Zhang PetscFunctionReturn(0); 246b2b77a04SHong Zhang } 247b2b77a04SHong Zhang 24897504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT 24997504071SAmlan Barua Input parameter: 2503564f093SHong Zhang A - the matrix 25197504071SAmlan Barua x - the vector on which BDFT will be performed 25297504071SAmlan Barua 25397504071SAmlan Barua Output parameter: 25497504071SAmlan Barua y - vector that stores result of BDFT 25597504071SAmlan Barua */ 256b2b77a04SHong Zhang #undef __FUNCT__ 257b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 258b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 259b2b77a04SHong Zhang { 260b2b77a04SHong Zhang PetscErrorCode ierr; 261b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 262b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 263b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 264c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 265ce94432eSBarry Smith MPI_Comm comm; 266b2b77a04SHong Zhang 267b2b77a04SHong Zhang PetscFunctionBegin; 268ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 269b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 270b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 271b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 272b2b77a04SHong Zhang switch (ndim) { 273b2b77a04SHong Zhang case 1: 2743c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 275b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 276ae0a50aaSHong Zhang #else 2774f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 2783c3a9c75SAmlan Barua #endif 279b2b77a04SHong Zhang break; 280b2b77a04SHong Zhang case 2: 28197504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 282b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2833c3a9c75SAmlan Barua #else 2843c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 2853c3a9c75SAmlan Barua #endif 286b2b77a04SHong Zhang break; 287b2b77a04SHong Zhang case 3: 2883c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 289b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2903c3a9c75SAmlan Barua #else 2913c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 2923c3a9c75SAmlan Barua #endif 293b2b77a04SHong Zhang break; 294b2b77a04SHong Zhang default: 2953c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 296c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2973c3a9c75SAmlan Barua #else 2983c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 2993c3a9c75SAmlan Barua #endif 300b2b77a04SHong Zhang break; 301b2b77a04SHong Zhang } 302b2b77a04SHong Zhang fftw->binarray = x_array; 303b2b77a04SHong Zhang fftw->boutarray = y_array; 304b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 305b2b77a04SHong Zhang } else { /* use existing plan */ 306b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 307b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 308b2b77a04SHong Zhang } else { 309b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 310b2b77a04SHong Zhang } 311b2b77a04SHong Zhang } 312b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 313b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 314b2b77a04SHong Zhang PetscFunctionReturn(0); 315b2b77a04SHong Zhang } 316b2b77a04SHong Zhang 317b2b77a04SHong Zhang #undef __FUNCT__ 318b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 319b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 320b2b77a04SHong Zhang { 321b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 322b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 323b2b77a04SHong Zhang PetscErrorCode ierr; 324b2b77a04SHong Zhang 325b2b77a04SHong Zhang PetscFunctionBegin; 326b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 327b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 3288c1d5ab3SHong Zhang if (fftw->iodims) { 3298c1d5ab3SHong Zhang /* ierr = PetscFree(fftw->iodims);CHKERRQ(ierr); */ 3308c1d5ab3SHong Zhang free(fftw->iodims); 3318c1d5ab3SHong Zhang } 332bb5bf6f6SHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 333bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 3346ccf0b3eSHong Zhang fftw_mpi_cleanup(); 335b2b77a04SHong Zhang PetscFunctionReturn(0); 336b2b77a04SHong Zhang } 337b2b77a04SHong Zhang 338c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 339b2b77a04SHong Zhang #undef __FUNCT__ 340b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 341b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 342b2b77a04SHong Zhang { 343b2b77a04SHong Zhang PetscErrorCode ierr; 344b2b77a04SHong Zhang PetscScalar *array; 345b2b77a04SHong Zhang 346b2b77a04SHong Zhang PetscFunctionBegin; 347b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 348b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 349b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 350b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 351b2b77a04SHong Zhang PetscFunctionReturn(0); 352b2b77a04SHong Zhang } 353b2b77a04SHong Zhang 3544f7415efSAmlan Barua #undef __FUNCT__ 3554be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW" 3564be45526SHong Zhang /*@ 357b2aa233eSHong Zhang MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 3584f7415efSAmlan Barua parallel layout determined by FFTW 3594f7415efSAmlan Barua 3604f7415efSAmlan Barua Collective on Mat 3614f7415efSAmlan Barua 3624f7415efSAmlan Barua Input Parameter: 3633564f093SHong Zhang . A - the matrix 3644f7415efSAmlan Barua 3654f7415efSAmlan Barua Output Parameter: 366cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW 367cc55f3b1SHong Zhang - y - (optional) output vector of forward FFTW 368cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW 3694f7415efSAmlan Barua 3704f7415efSAmlan Barua Level: advanced 3713564f093SHong Zhang 37297504071SAmlan Barua Note: The parallel layout of output of forward FFTW is always same as the input 37397504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 37497504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 37597504071SAmlan Barua Also note that we need to provide enough space while doing parallel real transform. 37697504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 37797504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 378e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 379e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two 380e0ec536eSAmlan Barua zeros if it is odd only one zero is needed. 381e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local 382e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for 383e0ec536eSAmlan Barua each processor and returns that. 3844f7415efSAmlan Barua 3854f7415efSAmlan Barua .seealso: MatCreateFFTW() 3864be45526SHong Zhang @*/ 3874be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 3884be45526SHong Zhang { 3894be45526SHong Zhang PetscErrorCode ierr; 390b4c3921fSHong Zhang 3914be45526SHong Zhang PetscFunctionBegin; 3924be45526SHong Zhang ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 3934be45526SHong Zhang PetscFunctionReturn(0); 3944be45526SHong Zhang } 3954be45526SHong Zhang 3964be45526SHong Zhang #undef __FUNCT__ 3974be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW" 3984be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 3994f7415efSAmlan Barua { 4004f7415efSAmlan Barua PetscErrorCode ierr; 4014f7415efSAmlan Barua PetscMPIInt size,rank; 402ce94432eSBarry Smith MPI_Comm comm; 4034f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 4044f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 4059496c9aeSAmlan Barua PetscInt N = fft->N; 4064f7415efSAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 4074f7415efSAmlan Barua 4084f7415efSAmlan Barua PetscFunctionBegin; 4094f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 4104f7415efSAmlan Barua PetscValidType(A,1); 411ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4124f7415efSAmlan Barua 4134f8276c3SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 4144f8276c3SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 4154f7415efSAmlan Barua if (size == 1) { /* sequential case */ 4164f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4174f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 4184f7415efSAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 4194f7415efSAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 4204f7415efSAmlan Barua #else 4218ca4c034SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 4228ca4c034SAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 4238ca4c034SAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 4244f7415efSAmlan Barua #endif 42597504071SAmlan Barua } else { /* parallel cases */ 4264f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 4279496c9aeSAmlan Barua ptrdiff_t local_n1; 4289496c9aeSAmlan Barua fftw_complex *data_fout; 4299496c9aeSAmlan Barua ptrdiff_t local_1_start; 4309496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4319496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 4329496c9aeSAmlan Barua #else 4334f7415efSAmlan Barua double *data_finr,*data_boutr; 4346ccf0b3eSHong Zhang PetscInt n1,N1; 4359496c9aeSAmlan Barua ptrdiff_t temp; 4369496c9aeSAmlan Barua #endif 4379496c9aeSAmlan Barua 4384f7415efSAmlan Barua switch (ndim) { 4394f7415efSAmlan Barua case 1: 44057625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4414f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 44257625b84SAmlan Barua #else 44357625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 44457625b84SAmlan Barua if (fin) { 44557625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 446778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 44726fbe8dcSKarl Rupp 44857625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 44957625b84SAmlan Barua } 45057625b84SAmlan Barua if (fout) { 45157625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 452778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 45326fbe8dcSKarl Rupp 45457625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 45557625b84SAmlan Barua } 45657625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 45757625b84SAmlan Barua if (bout) { 45857625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 459778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 46026fbe8dcSKarl Rupp 46157625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 46257625b84SAmlan Barua } 46357625b84SAmlan Barua break; 46457625b84SAmlan Barua #endif 4654f7415efSAmlan Barua case 2: 46697504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 4674f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4684f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 4694f7415efSAmlan Barua if (fin) { 4704f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 471778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 47226fbe8dcSKarl Rupp 4734f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4744f7415efSAmlan Barua } 4754f7415efSAmlan Barua if (bout) { 4764f7415efSAmlan Barua data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 477778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 47826fbe8dcSKarl Rupp 4794f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4804f7415efSAmlan Barua } 48157625b84SAmlan Barua if (fout) { 48257625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 483778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 48426fbe8dcSKarl Rupp 48557625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 48657625b84SAmlan Barua } 4874f7415efSAmlan Barua #else 4884f7415efSAmlan Barua /* Get local size */ 4894f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 4904f7415efSAmlan Barua if (fin) { 4914f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 492778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 49326fbe8dcSKarl Rupp 4944f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4954f7415efSAmlan Barua } 4964f7415efSAmlan Barua if (fout) { 4974f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 498778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 49926fbe8dcSKarl Rupp 5004f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5014f7415efSAmlan Barua } 5024f7415efSAmlan Barua if (bout) { 5034f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 504778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 50526fbe8dcSKarl Rupp 5064f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5074f7415efSAmlan Barua } 5084f7415efSAmlan Barua #endif 5094f7415efSAmlan Barua break; 5104f7415efSAmlan Barua case 3: 5114f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5124f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5134f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 5144f7415efSAmlan Barua if (fin) { 5154f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 516778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 51726fbe8dcSKarl Rupp 5184f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5194f7415efSAmlan Barua } 5204f7415efSAmlan Barua if (bout) { 5214f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 522778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 52326fbe8dcSKarl Rupp 5244f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5254f7415efSAmlan Barua } 5263564f093SHong Zhang 52757625b84SAmlan Barua if (fout) { 52857625b84SAmlan Barua data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 529778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 53026fbe8dcSKarl Rupp 53157625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 53257625b84SAmlan Barua } 5334f7415efSAmlan Barua #else 5340c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 5350c9b18e4SAmlan Barua if (fin) { 5360c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 537778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 53826fbe8dcSKarl Rupp 5390c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5400c9b18e4SAmlan Barua } 5410c9b18e4SAmlan Barua if (fout) { 5420c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 543778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 54426fbe8dcSKarl Rupp 5450c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5460c9b18e4SAmlan Barua } 5470c9b18e4SAmlan Barua if (bout) { 5480c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 549778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 55026fbe8dcSKarl Rupp 5510c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5520c9b18e4SAmlan Barua } 5534f7415efSAmlan Barua #endif 5544f7415efSAmlan Barua break; 5554f7415efSAmlan Barua default: 5564f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5574f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 55826fbe8dcSKarl Rupp 5594f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 56026fbe8dcSKarl Rupp 5614f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5624f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 56326fbe8dcSKarl Rupp 5644f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 5654f7415efSAmlan Barua 5664f7415efSAmlan Barua if (fin) { 5674f7415efSAmlan Barua data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 568778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 56926fbe8dcSKarl Rupp 5704f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5714f7415efSAmlan Barua } 5724f7415efSAmlan Barua if (bout) { 5734f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 574778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 57526fbe8dcSKarl Rupp 5769496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5774f7415efSAmlan Barua } 5783564f093SHong Zhang 57957625b84SAmlan Barua if (fout) { 58057625b84SAmlan Barua data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 581778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 58226fbe8dcSKarl Rupp 58357625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 58457625b84SAmlan Barua } 5854f7415efSAmlan Barua #else 5860c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 5870c9b18e4SAmlan Barua if (fin) { 5880c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 589778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 59026fbe8dcSKarl Rupp 5910c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5920c9b18e4SAmlan Barua } 5930c9b18e4SAmlan Barua if (fout) { 5940c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 595778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 59626fbe8dcSKarl Rupp 5970c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5980c9b18e4SAmlan Barua } 5990c9b18e4SAmlan Barua if (bout) { 6000c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 601778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 60226fbe8dcSKarl Rupp 6030c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6040c9b18e4SAmlan Barua } 6054f7415efSAmlan Barua #endif 6064f7415efSAmlan Barua break; 6074f7415efSAmlan Barua } 6084f7415efSAmlan Barua } 6094f7415efSAmlan Barua PetscFunctionReturn(0); 6104f7415efSAmlan Barua } 6114f7415efSAmlan Barua 612c92418ccSAmlan Barua #undef __FUNCT__ 61374a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW" 6143564f093SHong Zhang /*@ 6153564f093SHong Zhang VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 61654efbe56SHong Zhang 6173564f093SHong Zhang Collective on Mat 6183564f093SHong Zhang 6193564f093SHong Zhang Input Parameters: 6203564f093SHong Zhang + A - FFTW matrix 6213564f093SHong Zhang - x - the PETSc vector 6223564f093SHong Zhang 6233564f093SHong Zhang Output Parameters: 6243564f093SHong Zhang . y - the FFTW vector 6253564f093SHong Zhang 626b2b77a04SHong Zhang Options Database Keys: 6273564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags 628b2b77a04SHong Zhang 629b2b77a04SHong Zhang Level: intermediate 6303564f093SHong Zhang 63197504071SAmlan Barua Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 63297504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 63397504071SAmlan Barua zeros. This routine does that job by scattering operation. 63497504071SAmlan Barua 6353564f093SHong Zhang .seealso: VecScatterFFTWToPetsc() 6363564f093SHong Zhang @*/ 6373564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 6383564f093SHong Zhang { 6393564f093SHong Zhang PetscErrorCode ierr; 640b2b77a04SHong Zhang 6413564f093SHong Zhang PetscFunctionBegin; 6423564f093SHong Zhang ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 6433564f093SHong Zhang PetscFunctionReturn(0); 6443564f093SHong Zhang } 6453c3a9c75SAmlan Barua 6463c3a9c75SAmlan Barua #undef __FUNCT__ 6471986ecc6SHong Zhang #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 64874a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 6493c3a9c75SAmlan Barua { 6503c3a9c75SAmlan Barua PetscErrorCode ierr; 651ce94432eSBarry Smith MPI_Comm comm; 6523c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 6533c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 6549496c9aeSAmlan Barua PetscInt N =fft->N; 655b5d11533SAmlan Barua PetscInt ndim =fft->ndim,*dim=fft->dim; 6569496c9aeSAmlan Barua PetscInt low; 6577a21806cSHong Zhang PetscMPIInt rank,size; 6587a21806cSHong Zhang PetscInt vsize,vsize1; 6597a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 6609496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 6613c3a9c75SAmlan Barua VecScatter vecscat; 6623c3a9c75SAmlan Barua IS list1,list2; 6639496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6649496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 6659496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 6669496c9aeSAmlan Barua PetscInt N1, n1,NM; 6679496c9aeSAmlan Barua ptrdiff_t temp; 6689496c9aeSAmlan Barua #endif 6693c3a9c75SAmlan Barua 6703564f093SHong Zhang PetscFunctionBegin; 671ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 672f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 673f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 6740298fd71SBarry Smith ierr = VecGetOwnershipRange(y,&low,NULL); 6753c3a9c75SAmlan Barua 6763564f093SHong Zhang if (size==1) { 6778ca4c034SAmlan Barua ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 6788ca4c034SAmlan Barua ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 6799496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 6806971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 6816971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6826971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6836971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 684b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 6853564f093SHong Zhang } else { 6863c3a9c75SAmlan Barua switch (ndim) { 6873c3a9c75SAmlan Barua case 1: 68864657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 6897a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 69026fbe8dcSKarl Rupp 6914f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 6924f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,low,1,&list2); 69364657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 69464657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 69564657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 69664657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 69764657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 69864657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 69964657d84SAmlan Barua #else 700e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 70164657d84SAmlan Barua #endif 7023c3a9c75SAmlan Barua break; 7033c3a9c75SAmlan Barua case 2: 704bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7057a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 70626fbe8dcSKarl Rupp 7074f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 7084f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 709bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 710bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 711bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 712bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 713bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 714bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 715bd59e6c6SAmlan Barua #else 716*c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 71726fbe8dcSKarl Rupp 7183c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 7199496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 7209496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 7213c3a9c75SAmlan Barua 7223564f093SHong Zhang if (dim[1]%2==0) { 7233c3a9c75SAmlan Barua NM = dim[1]+2; 7243564f093SHong Zhang } else { 7253c3a9c75SAmlan Barua NM = dim[1]+1; 7263564f093SHong Zhang } 7273c3a9c75SAmlan Barua for (i=0; i<local_n0; i++) { 7283c3a9c75SAmlan Barua for (j=0; j<dim[1]; j++) { 7293c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 7303c3a9c75SAmlan Barua tempindx1 = i*NM + j; 73126fbe8dcSKarl Rupp 7325b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 7333c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 7343c3a9c75SAmlan Barua } 7353c3a9c75SAmlan Barua } 7363c3a9c75SAmlan Barua 7373c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 7383c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 7393c3a9c75SAmlan Barua 740f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 741f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 743f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 744b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 745b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 746b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 747b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 748bd59e6c6SAmlan Barua #endif 7499496c9aeSAmlan Barua break; 7503c3a9c75SAmlan Barua 7513c3a9c75SAmlan Barua case 3: 752bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7537a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 75426fbe8dcSKarl Rupp 7554f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 7564f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 757bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 758bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 759bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 760bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 761bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 762bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 763bd59e6c6SAmlan Barua #else 7647a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 76526fbe8dcSKarl Rupp 76651d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 76751d1eed7SAmlan Barua 7689496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 7699496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 77051d1eed7SAmlan Barua 771db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 772db4deed7SKarl Rupp else NM = dim[2]+1; 77351d1eed7SAmlan Barua 77451d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 77551d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 77651d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 77751d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 77851d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 77926fbe8dcSKarl Rupp 78051d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 78151d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 78251d1eed7SAmlan Barua } 78351d1eed7SAmlan Barua } 78451d1eed7SAmlan Barua } 78551d1eed7SAmlan Barua 78651d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 78751d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 78851d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 78951d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 79051d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 79151d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 792b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 793b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 794b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 795b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 796bd59e6c6SAmlan Barua #endif 7979496c9aeSAmlan Barua break; 7983c3a9c75SAmlan Barua 7993c3a9c75SAmlan Barua default: 800bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8017a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 80226fbe8dcSKarl Rupp 8034f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 8044f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 805bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 806bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 807bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 808bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 809bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 810bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 811bd59e6c6SAmlan Barua #else 812e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 81326fbe8dcSKarl Rupp 814e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 81526fbe8dcSKarl Rupp 8167a21806cSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 81726fbe8dcSKarl Rupp 818e5d7f247SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 81926fbe8dcSKarl Rupp 820e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 821e5d7f247SAmlan Barua 822e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 823e5d7f247SAmlan Barua 824e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 825e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 826e5d7f247SAmlan Barua 827db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 828db4deed7SKarl Rupp else NM = 1; 829e5d7f247SAmlan Barua 8306971673cSAmlan Barua j = low; 8313564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 8326971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 8336971673cSAmlan Barua indx2[i] = j; 83426fbe8dcSKarl Rupp if (k%dim[ndim-1]==0) j+=NM; 8356971673cSAmlan Barua j++; 8366971673cSAmlan Barua } 8376971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 8386971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 8396971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 8406971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8416971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8426971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 843b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 844b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 845b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 846b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 847bd59e6c6SAmlan Barua #endif 8489496c9aeSAmlan Barua break; 8493c3a9c75SAmlan Barua } 850e81bb599SAmlan Barua } 8513564f093SHong Zhang PetscFunctionReturn(0); 8523c3a9c75SAmlan Barua } 8533c3a9c75SAmlan Barua 8543c3a9c75SAmlan Barua #undef __FUNCT__ 85574a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc" 8563564f093SHong Zhang /*@ 8573564f093SHong Zhang VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 8583564f093SHong Zhang 8593564f093SHong Zhang Collective on Mat 8603564f093SHong Zhang 8613564f093SHong Zhang Input Parameters: 8623564f093SHong Zhang + A - FFTW matrix 8633564f093SHong Zhang - x - FFTW vector 8643564f093SHong Zhang 8653564f093SHong Zhang Output Parameters: 8663564f093SHong Zhang . y - PETSc vector 8673564f093SHong Zhang 8683564f093SHong Zhang Level: intermediate 8693564f093SHong Zhang 8703564f093SHong Zhang Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 8713564f093SHong Zhang VecScatterFFTWToPetsc removes those extra zeros. 8723564f093SHong Zhang 8733564f093SHong Zhang .seealso: VecScatterPetscToFFTW() 8743564f093SHong Zhang @*/ 87574a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 8763c3a9c75SAmlan Barua { 8773c3a9c75SAmlan Barua PetscErrorCode ierr; 8785fd66863SKarl Rupp 8793c3a9c75SAmlan Barua PetscFunctionBegin; 88074a26884SAmlan Barua ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 8813c3a9c75SAmlan Barua PetscFunctionReturn(0); 8823c3a9c75SAmlan Barua } 88354efbe56SHong Zhang 8843c3a9c75SAmlan Barua #undef __FUNCT__ 88574a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 88674a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 8875b3e41ffSAmlan Barua { 8885b3e41ffSAmlan Barua PetscErrorCode ierr; 889ce94432eSBarry Smith MPI_Comm comm; 8905b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 8915b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 8929496c9aeSAmlan Barua PetscInt N = fft->N; 893b3655f67SAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim; 8949496c9aeSAmlan Barua PetscInt low; 8957a21806cSHong Zhang PetscMPIInt rank,size; 8967a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 8979496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 8985b3e41ffSAmlan Barua VecScatter vecscat; 8995b3e41ffSAmlan Barua IS list1,list2; 9009496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 9019496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 9029496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 9039496c9aeSAmlan Barua PetscInt N1, n1,NM; 9049496c9aeSAmlan Barua ptrdiff_t temp; 9059496c9aeSAmlan Barua #endif 9069496c9aeSAmlan Barua 9073564f093SHong Zhang PetscFunctionBegin; 908ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 9095b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 9105b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 9110298fd71SBarry Smith ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 9125b3e41ffSAmlan Barua 913e81bb599SAmlan Barua if (size==1) { 914b3655f67SAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 9156971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 9166971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9176971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9186971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 919b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 920e81bb599SAmlan Barua 9213564f093SHong Zhang } else { 922e81bb599SAmlan Barua switch (ndim) { 923e81bb599SAmlan Barua case 1: 92464657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 9257a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 92626fbe8dcSKarl Rupp 9274f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 9284f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,low,1,&list2); 92964657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 93064657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 93164657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 93264657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 93364657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 93464657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 93564657d84SAmlan Barua #else 9366a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 93764657d84SAmlan Barua #endif 9385b3e41ffSAmlan Barua break; 9395b3e41ffSAmlan Barua case 2: 940bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 9417a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 94226fbe8dcSKarl Rupp 9434f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 9444f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 945bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 946bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 947bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 948bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 949bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 950bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 951bd59e6c6SAmlan Barua #else 9527a21806cSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 95326fbe8dcSKarl Rupp 9545b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 9555b3e41ffSAmlan Barua 9569496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 9579496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 9585b3e41ffSAmlan Barua 959db4deed7SKarl Rupp if (dim[1]%2==0) NM = dim[1]+2; 960db4deed7SKarl Rupp else NM = dim[1]+1; 9615b3e41ffSAmlan Barua 9625b3e41ffSAmlan Barua for (i=0; i<local_n0; i++) { 9635b3e41ffSAmlan Barua for (j=0; j<dim[1]; j++) { 9645b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 9655b3e41ffSAmlan Barua tempindx1 = i*NM + j; 96626fbe8dcSKarl Rupp 9675b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 9685b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 9695b3e41ffSAmlan Barua } 9705b3e41ffSAmlan Barua } 9715b3e41ffSAmlan Barua 9725b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 9735b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 9745b3e41ffSAmlan Barua 9755b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 9765b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9775b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9785b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 979b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 980b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 981b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 982b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 983bd59e6c6SAmlan Barua #endif 9849496c9aeSAmlan Barua break; 9855b3e41ffSAmlan Barua case 3: 986bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 9877a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 98826fbe8dcSKarl Rupp 9894f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 9904f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 991bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 992bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 993bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 994bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 995bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 996bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 997bd59e6c6SAmlan Barua #else 998bd59e6c6SAmlan Barua 9997a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 100026fbe8dcSKarl Rupp 100151d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 100251d1eed7SAmlan Barua 10039496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 10049496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 100551d1eed7SAmlan Barua 1006db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 1007db4deed7SKarl Rupp else NM = dim[2]+1; 100851d1eed7SAmlan Barua 100951d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 101051d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 101151d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 101251d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 101351d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 101426fbe8dcSKarl Rupp 101551d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 101651d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 101751d1eed7SAmlan Barua } 101851d1eed7SAmlan Barua } 101951d1eed7SAmlan Barua } 102051d1eed7SAmlan Barua 102151d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 102251d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 102351d1eed7SAmlan Barua 102451d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 102551d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 102651d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 102751d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1028b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1029b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1030b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1031b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1032bd59e6c6SAmlan Barua #endif 10339496c9aeSAmlan Barua break; 10345b3e41ffSAmlan Barua default: 1035bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10367a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 103726fbe8dcSKarl Rupp 10384f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 10394f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1040bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1041bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1043bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1044bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1045bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1046bd59e6c6SAmlan Barua #else 1047ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 104826fbe8dcSKarl Rupp 1049ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 105026fbe8dcSKarl Rupp 1051*c3eba89aSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 105226fbe8dcSKarl Rupp 1053ba6e06d0SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 105426fbe8dcSKarl Rupp 1055ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1056ba6e06d0SAmlan Barua 1057ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1058ba6e06d0SAmlan Barua 1059ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1060ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1061ba6e06d0SAmlan Barua 1062db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 1063db4deed7SKarl Rupp else NM = 1; 1064ba6e06d0SAmlan Barua 1065ba6e06d0SAmlan Barua j = low; 10663564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1067ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1068ba6e06d0SAmlan Barua indx2[i] = j; 10693564f093SHong Zhang if (k%dim[ndim-1]==0) j+=NM; 1070ba6e06d0SAmlan Barua j++; 1071ba6e06d0SAmlan Barua } 1072ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1073ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1074ba6e06d0SAmlan Barua 1075ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1076ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1077ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1078ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1079b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1080b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1081b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1082b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1083bd59e6c6SAmlan Barua #endif 10849496c9aeSAmlan Barua break; 10855b3e41ffSAmlan Barua } 1086e81bb599SAmlan Barua } 10873564f093SHong Zhang PetscFunctionReturn(0); 10885b3e41ffSAmlan Barua } 10895b3e41ffSAmlan Barua 10905b3e41ffSAmlan Barua #undef __FUNCT__ 10913c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 10923c3a9c75SAmlan Barua /* 10933564f093SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 10943564f093SHong Zhang 10953c3a9c75SAmlan Barua Options Database Keys: 10963c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 10973c3a9c75SAmlan Barua 10983c3a9c75SAmlan Barua Level: intermediate 10993c3a9c75SAmlan Barua 11003c3a9c75SAmlan Barua */ 11018cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1102b2b77a04SHong Zhang { 1103b2b77a04SHong Zhang PetscErrorCode ierr; 1104ce94432eSBarry Smith MPI_Comm comm; 1105b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1106b2b77a04SHong Zhang Mat_FFTW *fftw; 1107b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 11085274ed1bSHong Zhang const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 11095274ed1bSHong Zhang unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1110b2b77a04SHong Zhang PetscBool flg; 11114f48bc67SAmlan Barua PetscInt p_flag,partial_dim=1,ctr; 111211902ff2SHong Zhang PetscMPIInt size,rank; 11139496c9aeSAmlan Barua ptrdiff_t *pdim; 11149496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 11159496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11169496c9aeSAmlan Barua ptrdiff_t temp; 11178ca4c034SAmlan Barua PetscInt N1,tot_dim; 11184f48bc67SAmlan Barua #else 11194f48bc67SAmlan Barua PetscInt n1; 11209496c9aeSAmlan Barua #endif 11219496c9aeSAmlan Barua 1122b2b77a04SHong Zhang PetscFunctionBegin; 1123ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1124b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 112511902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1126c92418ccSAmlan Barua 11271878d478SAmlan Barua fftw_mpi_init(); 112811902ff2SHong Zhang pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 112911902ff2SHong Zhang pdim[0] = dim[0]; 11308ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11318ca4c034SAmlan Barua tot_dim = 2*dim[0]; 11328ca4c034SAmlan Barua #endif 11333564f093SHong Zhang for (ctr=1; ctr<ndim; ctr++) { 11346882372aSHong Zhang partial_dim *= dim[ctr]; 113511902ff2SHong Zhang pdim[ctr] = dim[ctr]; 11368ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1137db4deed7SKarl Rupp if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1138db4deed7SKarl Rupp else tot_dim *= dim[ctr]; 11398ca4c034SAmlan Barua #endif 11406882372aSHong Zhang } 11413c3a9c75SAmlan Barua 1142b2b77a04SHong Zhang if (size == 1) { 1143e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1144b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1145b2b77a04SHong Zhang n = N; 1146e81bb599SAmlan Barua #else 1147e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1148e81bb599SAmlan Barua n = tot_dim; 1149e81bb599SAmlan Barua #endif 1150e81bb599SAmlan Barua 1151b2b77a04SHong Zhang } else { 11527a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 1153b2b77a04SHong Zhang switch (ndim) { 1154b2b77a04SHong Zhang case 1: 11553c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11563c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1157e5d7f247SAmlan Barua #else 11587a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 115926fbe8dcSKarl Rupp 11606882372aSHong Zhang n = (PetscInt)local_n0; 11619496c9aeSAmlan Barua n1 = (PetscInt)local_n1; 11629496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1163e5d7f247SAmlan Barua #endif 1164b2b77a04SHong Zhang break; 1165b2b77a04SHong Zhang case 2: 11665b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 11677a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1168b2b77a04SHong Zhang /* 1169b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 11700ec8b6e3SBarry Smith PetscSynchronizedFlush(comm,PETSC_STDOUT); 1171b2b77a04SHong Zhang */ 1172b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1173b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 11745b3e41ffSAmlan Barua #else 1175*c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 117626fbe8dcSKarl Rupp 11775b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1178c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 11795b3e41ffSAmlan Barua #endif 1180b2b77a04SHong Zhang break; 1181b2b77a04SHong Zhang case 3: 118251d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 11837a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 118426fbe8dcSKarl Rupp 11856882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 11866882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 118751d1eed7SAmlan Barua #else 1188*c3eba89aSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 118926fbe8dcSKarl Rupp 119051d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1191c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 119251d1eed7SAmlan Barua #endif 1193b2b77a04SHong Zhang break; 1194b2b77a04SHong Zhang default: 1195b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 11967a21806cSHong Zhang fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 119726fbe8dcSKarl Rupp 11986882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 11996882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1200b3a17365SAmlan Barua #else 1201b3a17365SAmlan Barua temp = pdim[ndim-1]; 120226fbe8dcSKarl Rupp 1203b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 120426fbe8dcSKarl Rupp 1205*c3eba89aSHong Zhang fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 120626fbe8dcSKarl Rupp 1207b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1208b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 120926fbe8dcSKarl Rupp 1210b3a17365SAmlan Barua pdim[ndim-1] = temp; 121126fbe8dcSKarl Rupp 1212c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1213b3a17365SAmlan Barua #endif 1214b2b77a04SHong Zhang break; 1215b2b77a04SHong Zhang } 1216b2b77a04SHong Zhang } 1217b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1218b00a9115SJed Brown ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1219b2b77a04SHong Zhang fft->data = (void*)fftw; 1220b2b77a04SHong Zhang 1221b2b77a04SHong Zhang fft->n = n; 12220c74a584SJed Brown fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1223e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 122426fbe8dcSKarl Rupp 12255e806cb5SMatthew G. Knepley ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 12268c1d5ab3SHong Zhang if (size == 1) { 12278c1d5ab3SHong Zhang /* ierr = PetscMalloc(ndim, &(fftw->iodims));CHKERRQ(ierr); error! */ 12288c1d5ab3SHong Zhang /* ierr = PetscMalloc(ndim*sizeof(fftw_iodim),&(fftw->iodims));CHKERRQ(ierr); -not sure if this is ok */ 12298c1d5ab3SHong Zhang fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 12308c1d5ab3SHong Zhang } 123126fbe8dcSKarl Rupp 1232b1301b2eSHong Zhang for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1233c92418ccSAmlan Barua 1234b2b77a04SHong Zhang fftw->p_forward = 0; 1235b2b77a04SHong Zhang fftw->p_backward = 0; 1236b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1237b2b77a04SHong Zhang 1238b2b77a04SHong Zhang if (size == 1) { 1239b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1240b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1241b2b77a04SHong Zhang } else { 1242b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1243b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1244b2b77a04SHong Zhang } 1245b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1246b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 12474a52bad8SHong Zhang A->preallocated = PETSC_TRUE; 124826fbe8dcSKarl Rupp 1249bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr); 1250bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1251bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1252b2b77a04SHong Zhang 1253b2b77a04SHong Zhang /* get runtime options */ 1254ce94432eSBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 12555274ed1bSHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 12565274ed1bSHong Zhang if (flg) { 12575274ed1bSHong Zhang fftw->p_flag = iplans[p_flag]; 12585274ed1bSHong Zhang } 12594a52bad8SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 1260b2b77a04SHong Zhang PetscFunctionReturn(0); 1261b2b77a04SHong Zhang } 12623c3a9c75SAmlan Barua 12633c3a9c75SAmlan Barua 12643c3a9c75SAmlan Barua 12653c3a9c75SAmlan Barua 1266