1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14e5d7f247SAmlan Barua PetscInt partial_dim; 15b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 16b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 19b2b77a04SHong Zhang } Mat_FFTW; 20b2b77a04SHong Zhang 21b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 26b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27b2b77a04SHong Zhang 2897504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel 2997504071SAmlan Barua Input parameter: 303564f093SHong Zhang A - the matrix 3197504071SAmlan Barua x - the vector on which FDFT will be performed 3297504071SAmlan Barua 3397504071SAmlan Barua Output parameter: 3497504071SAmlan Barua y - vector that stores result of FDFT 3597504071SAmlan Barua */ 36b2b77a04SHong Zhang #undef __FUNCT__ 37b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 38b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 39b2b77a04SHong Zhang { 40b2b77a04SHong Zhang PetscErrorCode ierr; 41b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 42b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 43b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 44b2b77a04SHong Zhang PetscInt ndim = fft->ndim,*dim = fft->dim; 45*7a21806cSHong Zhang int *dim_32,i; 46b2b77a04SHong Zhang 47b2b77a04SHong Zhang PetscFunctionBegin; 48*7a21806cSHong Zhang ierr = PetscMalloc(ndim*sizeof(int),&dim_32); 49*7a21806cSHong Zhang for (i=0; i<ndim; i++) dim_32[i] = (int)dim[i]; 50*7a21806cSHong Zhang 51b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 52b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 53b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 54b2b77a04SHong Zhang switch (ndim) { 55b2b77a04SHong Zhang case 1: 5658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 57b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 5858a912c5SAmlan Barua #else 5958a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 6058a912c5SAmlan Barua #endif 61b2b77a04SHong Zhang break; 62b2b77a04SHong Zhang case 2: 6358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 64b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6558a912c5SAmlan Barua #else 6658a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 6758a912c5SAmlan Barua #endif 68b2b77a04SHong Zhang break; 69b2b77a04SHong Zhang case 3: 7058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 71b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 72*7a21806cSHong Zhang //fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,FFTW_MEASURE); 7358a912c5SAmlan Barua #else 7458a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 7558a912c5SAmlan Barua #endif 76b2b77a04SHong Zhang break; 77b2b77a04SHong Zhang default: 7858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 79*7a21806cSHong Zhang fftw->p_forward = fftw_plan_dft((int)ndim,(int*)dim_32,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 805274ed1bSHong Zhang /* bug: dim[3] changes from 10 to 0 '--with-64-bit-indices=1' */ 8158a912c5SAmlan Barua #else 8258a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 8358a912c5SAmlan Barua #endif 84b2b77a04SHong Zhang break; 85b2b77a04SHong Zhang } 86b2b77a04SHong Zhang fftw->finarray = x_array; 87b2b77a04SHong Zhang fftw->foutarray = y_array; 88b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 89b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 90b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 91b2b77a04SHong Zhang } else { /* use existing plan */ 92b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 93b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 94b2b77a04SHong Zhang } else { 95b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 96b2b77a04SHong Zhang } 97b2b77a04SHong Zhang } 98b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 99b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 100*7a21806cSHong Zhang ierr = PetscFree(dim_32);CHKERRQ(ierr); 101b2b77a04SHong Zhang PetscFunctionReturn(0); 102b2b77a04SHong Zhang } 103b2b77a04SHong Zhang 10497504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT 10597504071SAmlan Barua Input parameter: 1063564f093SHong Zhang A - the matrix 10797504071SAmlan Barua x - the vector on which BDFT will be performed 10897504071SAmlan Barua 10997504071SAmlan Barua Output parameter: 11097504071SAmlan Barua y - vector that stores result of BDFT 11197504071SAmlan Barua */ 11297504071SAmlan Barua 113b2b77a04SHong Zhang #undef __FUNCT__ 114b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 115b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 116b2b77a04SHong Zhang { 117b2b77a04SHong Zhang PetscErrorCode ierr; 118b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 119b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 120b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 121b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 122*7a21806cSHong Zhang int *dim_32,i; /* for 64-bit */ 123b2b77a04SHong Zhang 124b2b77a04SHong Zhang PetscFunctionBegin; 125*7a21806cSHong Zhang ierr = PetscMalloc(ndim*sizeof(int),&dim_32); 126*7a21806cSHong Zhang for (i=0; i<ndim; i++) dim_32[i] = (int)dim[i]; 127*7a21806cSHong Zhang 128b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 129b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 130b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 131b2b77a04SHong Zhang switch (ndim) { 132b2b77a04SHong Zhang case 1: 13358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 134b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 13558a912c5SAmlan Barua #else 136e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 13758a912c5SAmlan Barua #endif 138b2b77a04SHong Zhang break; 139b2b77a04SHong Zhang case 2: 14058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 141b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 14258a912c5SAmlan Barua #else 143e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 14458a912c5SAmlan Barua #endif 145b2b77a04SHong Zhang break; 146b2b77a04SHong Zhang case 3: 14758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 148b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 149*7a21806cSHong Zhang //fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,FFTW_MEASURE); 15058a912c5SAmlan Barua #else 151e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 15258a912c5SAmlan Barua #endif 153b2b77a04SHong Zhang break; 154b2b77a04SHong Zhang default: 15558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 156*7a21806cSHong Zhang fftw->p_backward = fftw_plan_dft((int)ndim,dim_32,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 15758a912c5SAmlan Barua #else 158e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 15958a912c5SAmlan Barua #endif 160b2b77a04SHong Zhang break; 161b2b77a04SHong Zhang } 162b2b77a04SHong Zhang fftw->binarray = x_array; 163b2b77a04SHong Zhang fftw->boutarray = y_array; 164b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 165b2b77a04SHong Zhang } else { /* use existing plan */ 166b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 167b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 168b2b77a04SHong Zhang } else { 169b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 170b2b77a04SHong Zhang } 171b2b77a04SHong Zhang } 172b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 173b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 174*7a21806cSHong Zhang ierr = PetscFree(dim_32);CHKERRQ(ierr); 175b2b77a04SHong Zhang PetscFunctionReturn(0); 176b2b77a04SHong Zhang } 177b2b77a04SHong Zhang 17897504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel 17997504071SAmlan Barua Input parameter: 1803564f093SHong Zhang A - the matrix 18197504071SAmlan Barua x - the vector on which FDFT will be performed 18297504071SAmlan Barua 18397504071SAmlan Barua Output parameter: 18497504071SAmlan Barua y - vector that stores result of FDFT 18597504071SAmlan Barua */ 186b2b77a04SHong Zhang #undef __FUNCT__ 187b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 188b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 189b2b77a04SHong Zhang { 190b2b77a04SHong Zhang PetscErrorCode ierr; 191b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 192b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 193b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 194c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 195ce94432eSBarry Smith MPI_Comm comm; 196b2b77a04SHong Zhang 197b2b77a04SHong Zhang PetscFunctionBegin; 198ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 199b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 200b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 201b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 202b2b77a04SHong Zhang switch (ndim) { 203b2b77a04SHong Zhang case 1: 2043c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 205b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 206ae0a50aaSHong Zhang #else 2074f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 2083c3a9c75SAmlan Barua #endif 209b2b77a04SHong Zhang break; 210b2b77a04SHong Zhang case 2: 21197504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 212b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2133c3a9c75SAmlan Barua #else 2143c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2153c3a9c75SAmlan Barua #endif 216b2b77a04SHong Zhang break; 217b2b77a04SHong Zhang case 3: 2183c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 219b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2203c3a9c75SAmlan Barua #else 22151d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2223c3a9c75SAmlan Barua #endif 223b2b77a04SHong Zhang break; 224b2b77a04SHong Zhang default: 2253c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 226c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2273c3a9c75SAmlan Barua #else 2283c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2293c3a9c75SAmlan Barua #endif 230b2b77a04SHong Zhang break; 231b2b77a04SHong Zhang } 232b2b77a04SHong Zhang fftw->finarray = x_array; 233b2b77a04SHong Zhang fftw->foutarray = y_array; 234b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 235b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 236b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 237b2b77a04SHong Zhang } else { /* use existing plan */ 238b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 239b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 240b2b77a04SHong Zhang } else { 241b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 242b2b77a04SHong Zhang } 243b2b77a04SHong Zhang } 244b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 245b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 246b2b77a04SHong Zhang PetscFunctionReturn(0); 247b2b77a04SHong Zhang } 248b2b77a04SHong Zhang 24997504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT 25097504071SAmlan Barua Input parameter: 2513564f093SHong Zhang A - the matrix 25297504071SAmlan Barua x - the vector on which BDFT will be performed 25397504071SAmlan Barua 25497504071SAmlan Barua Output parameter: 25597504071SAmlan Barua y - vector that stores result of BDFT 25697504071SAmlan Barua */ 257b2b77a04SHong Zhang #undef __FUNCT__ 258b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 259b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 260b2b77a04SHong Zhang { 261b2b77a04SHong Zhang PetscErrorCode ierr; 262b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 263b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 264b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 265c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 266ce94432eSBarry Smith MPI_Comm comm; 267b2b77a04SHong Zhang 268b2b77a04SHong Zhang PetscFunctionBegin; 269ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 270b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 271b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 272b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 273b2b77a04SHong Zhang switch (ndim) { 274b2b77a04SHong Zhang case 1: 2753c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 276b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 277ae0a50aaSHong Zhang #else 2784f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 2793c3a9c75SAmlan Barua #endif 280b2b77a04SHong Zhang break; 281b2b77a04SHong Zhang case 2: 28297504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 283b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2843c3a9c75SAmlan Barua #else 2853c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 2863c3a9c75SAmlan Barua #endif 287b2b77a04SHong Zhang break; 288b2b77a04SHong Zhang case 3: 2893c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 290b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2913c3a9c75SAmlan Barua #else 2923c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 2933c3a9c75SAmlan Barua #endif 294b2b77a04SHong Zhang break; 295b2b77a04SHong Zhang default: 2963c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 297c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 2983c3a9c75SAmlan Barua #else 2993c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3003c3a9c75SAmlan Barua #endif 301b2b77a04SHong Zhang break; 302b2b77a04SHong Zhang } 303b2b77a04SHong Zhang fftw->binarray = x_array; 304b2b77a04SHong Zhang fftw->boutarray = y_array; 305b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 306b2b77a04SHong Zhang } else { /* use existing plan */ 307b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 308b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 309b2b77a04SHong Zhang } else { 310b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 311b2b77a04SHong Zhang } 312b2b77a04SHong Zhang } 313b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 314b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 315b2b77a04SHong Zhang PetscFunctionReturn(0); 316b2b77a04SHong Zhang } 317b2b77a04SHong Zhang 318b2b77a04SHong Zhang #undef __FUNCT__ 319b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 320b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 321b2b77a04SHong Zhang { 322b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 323b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 324b2b77a04SHong Zhang PetscErrorCode ierr; 325b2b77a04SHong Zhang 326b2b77a04SHong Zhang PetscFunctionBegin; 327b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 328b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 329b1301b2eSHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 330bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 3316ccf0b3eSHong Zhang fftw_mpi_cleanup(); 332b2b77a04SHong Zhang PetscFunctionReturn(0); 333b2b77a04SHong Zhang } 334b2b77a04SHong Zhang 335c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 336b2b77a04SHong Zhang #undef __FUNCT__ 337b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 338b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 339b2b77a04SHong Zhang { 340b2b77a04SHong Zhang PetscErrorCode ierr; 341b2b77a04SHong Zhang PetscScalar *array; 342b2b77a04SHong Zhang 343b2b77a04SHong Zhang PetscFunctionBegin; 344b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 345b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 346b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 347b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 348b2b77a04SHong Zhang PetscFunctionReturn(0); 349b2b77a04SHong Zhang } 350b2b77a04SHong Zhang 3514f7415efSAmlan Barua #undef __FUNCT__ 3524be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW" 3534be45526SHong Zhang /*@ 354b2aa233eSHong Zhang MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 3554f7415efSAmlan Barua parallel layout determined by FFTW 3564f7415efSAmlan Barua 3574f7415efSAmlan Barua Collective on Mat 3584f7415efSAmlan Barua 3594f7415efSAmlan Barua Input Parameter: 3603564f093SHong Zhang . A - the matrix 3614f7415efSAmlan Barua 3624f7415efSAmlan Barua Output Parameter: 363cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW 364cc55f3b1SHong Zhang - y - (optional) output vector of forward FFTW 365cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW 3664f7415efSAmlan Barua 3674f7415efSAmlan Barua Level: advanced 3683564f093SHong Zhang 36997504071SAmlan Barua Note: The parallel layout of output of forward FFTW is always same as the input 37097504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 37197504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 37297504071SAmlan Barua Also note that we need to provide enough space while doing parallel real transform. 37397504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 37497504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 375e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 376e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two 377e0ec536eSAmlan Barua zeros if it is odd only one zero is needed. 378e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local 379e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for 380e0ec536eSAmlan Barua each processor and returns that. 3814f7415efSAmlan Barua 3824f7415efSAmlan Barua .seealso: MatCreateFFTW() 3834be45526SHong Zhang @*/ 3844be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 3854be45526SHong Zhang { 3864be45526SHong Zhang PetscErrorCode ierr; 387b4c3921fSHong Zhang 3884be45526SHong Zhang PetscFunctionBegin; 3894be45526SHong Zhang ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 3904be45526SHong Zhang PetscFunctionReturn(0); 3914be45526SHong Zhang } 3924be45526SHong Zhang 3934be45526SHong Zhang #undef __FUNCT__ 3944be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW" 3954be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 3964f7415efSAmlan Barua { 3974f7415efSAmlan Barua PetscErrorCode ierr; 3984f7415efSAmlan Barua PetscMPIInt size,rank; 399ce94432eSBarry Smith MPI_Comm comm; 4004f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 4014f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 4029496c9aeSAmlan Barua PetscInt N = fft->N; 4034f7415efSAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 4044f7415efSAmlan Barua 4054f7415efSAmlan Barua PetscFunctionBegin; 4064f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 4074f7415efSAmlan Barua PetscValidType(A,1); 408ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4094f7415efSAmlan Barua 4104f8276c3SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 4114f8276c3SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 4124f7415efSAmlan Barua if (size == 1) { /* sequential case */ 4134f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4144f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 4154f7415efSAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 4164f7415efSAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 4174f7415efSAmlan Barua #else 4188ca4c034SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 4198ca4c034SAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 4208ca4c034SAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 4214f7415efSAmlan Barua #endif 42297504071SAmlan Barua } else { /* parallel cases */ 4234f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 4249496c9aeSAmlan Barua ptrdiff_t local_n1; 4259496c9aeSAmlan Barua fftw_complex *data_fout; 4269496c9aeSAmlan Barua ptrdiff_t local_1_start; 4279496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4289496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 4299496c9aeSAmlan Barua #else 4304f7415efSAmlan Barua double *data_finr,*data_boutr; 4316ccf0b3eSHong Zhang PetscInt n1,N1; 4329496c9aeSAmlan Barua ptrdiff_t temp; 4339496c9aeSAmlan Barua #endif 4349496c9aeSAmlan Barua 4354f7415efSAmlan Barua switch (ndim) { 4364f7415efSAmlan Barua case 1: 43757625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4384f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 43957625b84SAmlan Barua #else 44057625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 44157625b84SAmlan Barua if (fin) { 44257625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 443778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 44426fbe8dcSKarl Rupp 44557625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 44657625b84SAmlan Barua } 44757625b84SAmlan Barua if (fout) { 44857625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 449778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 45026fbe8dcSKarl Rupp 45157625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 45257625b84SAmlan Barua } 45357625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 45457625b84SAmlan Barua if (bout) { 45557625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 456778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 45726fbe8dcSKarl Rupp 45857625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 45957625b84SAmlan Barua } 46057625b84SAmlan Barua break; 46157625b84SAmlan Barua #endif 4624f7415efSAmlan Barua case 2: 46397504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 4644f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4654f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 4664f7415efSAmlan Barua if (fin) { 4674f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 468778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 46926fbe8dcSKarl Rupp 4704f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4714f7415efSAmlan Barua } 4724f7415efSAmlan Barua if (bout) { 4734f7415efSAmlan Barua data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 474778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 47526fbe8dcSKarl Rupp 4764f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 4774f7415efSAmlan Barua } 47857625b84SAmlan Barua if (fout) { 47957625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 480778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 48126fbe8dcSKarl Rupp 48257625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 48357625b84SAmlan Barua } 4844f7415efSAmlan Barua #else 4854f7415efSAmlan Barua /* Get local size */ 4864f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 4874f7415efSAmlan Barua if (fin) { 4884f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 489778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 49026fbe8dcSKarl Rupp 4914f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 4924f7415efSAmlan Barua } 4934f7415efSAmlan Barua if (fout) { 4944f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 495778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 49626fbe8dcSKarl Rupp 4974f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 4984f7415efSAmlan Barua } 4994f7415efSAmlan Barua if (bout) { 5004f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 501778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 50226fbe8dcSKarl Rupp 5034f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5044f7415efSAmlan Barua } 5054f7415efSAmlan Barua #endif 5064f7415efSAmlan Barua break; 5074f7415efSAmlan Barua case 3: 5084f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5094f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5104f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 5114f7415efSAmlan Barua if (fin) { 5124f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 513778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 51426fbe8dcSKarl Rupp 5154f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5164f7415efSAmlan Barua } 5174f7415efSAmlan Barua if (bout) { 5184f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 519778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 52026fbe8dcSKarl Rupp 5214f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5224f7415efSAmlan Barua } 5233564f093SHong Zhang 52457625b84SAmlan Barua if (fout) { 52557625b84SAmlan Barua data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 526778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 52726fbe8dcSKarl Rupp 52857625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 52957625b84SAmlan Barua } 5304f7415efSAmlan Barua #else 5310c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 5320c9b18e4SAmlan Barua if (fin) { 5330c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 534778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 53526fbe8dcSKarl Rupp 5360c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5370c9b18e4SAmlan Barua } 5380c9b18e4SAmlan Barua if (fout) { 5390c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 540778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 54126fbe8dcSKarl Rupp 5420c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5430c9b18e4SAmlan Barua } 5440c9b18e4SAmlan Barua if (bout) { 5450c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 546778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 54726fbe8dcSKarl Rupp 5480c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5490c9b18e4SAmlan Barua } 5504f7415efSAmlan Barua #endif 5514f7415efSAmlan Barua break; 5524f7415efSAmlan Barua default: 5534f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5544f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 55526fbe8dcSKarl Rupp 5564f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 55726fbe8dcSKarl Rupp 5584f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5594f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 56026fbe8dcSKarl Rupp 5614f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 5624f7415efSAmlan Barua 5634f7415efSAmlan Barua if (fin) { 5644f7415efSAmlan Barua data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 565778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 56626fbe8dcSKarl Rupp 5674f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5684f7415efSAmlan Barua } 5694f7415efSAmlan Barua if (bout) { 5704f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 571778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 57226fbe8dcSKarl Rupp 5739496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5744f7415efSAmlan Barua } 5753564f093SHong Zhang 57657625b84SAmlan Barua if (fout) { 57757625b84SAmlan Barua data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 578778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 57926fbe8dcSKarl Rupp 58057625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 58157625b84SAmlan Barua } 5824f7415efSAmlan Barua #else 5830c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 5840c9b18e4SAmlan Barua if (fin) { 5850c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 586778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 58726fbe8dcSKarl Rupp 5880c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5890c9b18e4SAmlan Barua } 5900c9b18e4SAmlan Barua if (fout) { 5910c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 592778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 59326fbe8dcSKarl Rupp 5940c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5950c9b18e4SAmlan Barua } 5960c9b18e4SAmlan Barua if (bout) { 5970c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 598778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 59926fbe8dcSKarl Rupp 6000c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6010c9b18e4SAmlan Barua } 6024f7415efSAmlan Barua #endif 6034f7415efSAmlan Barua break; 6044f7415efSAmlan Barua } 6054f7415efSAmlan Barua } 6064f7415efSAmlan Barua PetscFunctionReturn(0); 6074f7415efSAmlan Barua } 6084f7415efSAmlan Barua 609c92418ccSAmlan Barua #undef __FUNCT__ 61074a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW" 6113564f093SHong Zhang /*@ 6123564f093SHong Zhang VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 61354efbe56SHong Zhang 6143564f093SHong Zhang Collective on Mat 6153564f093SHong Zhang 6163564f093SHong Zhang Input Parameters: 6173564f093SHong Zhang + A - FFTW matrix 6183564f093SHong Zhang - x - the PETSc vector 6193564f093SHong Zhang 6203564f093SHong Zhang Output Parameters: 6213564f093SHong Zhang . y - the FFTW vector 6223564f093SHong Zhang 623b2b77a04SHong Zhang Options Database Keys: 6243564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags 625b2b77a04SHong Zhang 626b2b77a04SHong Zhang Level: intermediate 6273564f093SHong Zhang 62897504071SAmlan Barua Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 62997504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 63097504071SAmlan Barua zeros. This routine does that job by scattering operation. 63197504071SAmlan Barua 6323564f093SHong Zhang .seealso: VecScatterFFTWToPetsc() 6333564f093SHong Zhang @*/ 6343564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 6353564f093SHong Zhang { 6363564f093SHong Zhang PetscErrorCode ierr; 637b2b77a04SHong Zhang 6383564f093SHong Zhang PetscFunctionBegin; 6393564f093SHong Zhang ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 6403564f093SHong Zhang PetscFunctionReturn(0); 6413564f093SHong Zhang } 6423c3a9c75SAmlan Barua 6433c3a9c75SAmlan Barua #undef __FUNCT__ 6441986ecc6SHong Zhang #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 64574a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 6463c3a9c75SAmlan Barua { 6473c3a9c75SAmlan Barua PetscErrorCode ierr; 648ce94432eSBarry Smith MPI_Comm comm; 6493c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 6503c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 6519496c9aeSAmlan Barua PetscInt N =fft->N; 652b5d11533SAmlan Barua PetscInt ndim =fft->ndim,*dim=fft->dim; 6539496c9aeSAmlan Barua PetscInt low; 654*7a21806cSHong Zhang PetscMPIInt rank,size; 655*7a21806cSHong Zhang PetscInt vsize,vsize1; 656*7a21806cSHong Zhang //ptrdiff_t alloc_local,local_n0,local_0_start; 657*7a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 6589496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 6593c3a9c75SAmlan Barua VecScatter vecscat; 6603c3a9c75SAmlan Barua IS list1,list2; 6619496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6629496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 6639496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 6649496c9aeSAmlan Barua PetscInt N1, n1,NM; 6659496c9aeSAmlan Barua ptrdiff_t temp; 6669496c9aeSAmlan Barua #endif 6673c3a9c75SAmlan Barua 6683564f093SHong Zhang PetscFunctionBegin; 669ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 670f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 671f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 6720298fd71SBarry Smith ierr = VecGetOwnershipRange(y,&low,NULL); 6733c3a9c75SAmlan Barua 6743564f093SHong Zhang if (size==1) { 6758ca4c034SAmlan Barua ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 6768ca4c034SAmlan Barua ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 6779496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 6786971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 6796971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6806971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6816971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 682b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 6833564f093SHong Zhang } else { 6843c3a9c75SAmlan Barua switch (ndim) { 6853c3a9c75SAmlan Barua case 1: 68664657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 687*7a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 68826fbe8dcSKarl Rupp 6894f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 6904f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,low,1,&list2); 69164657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 69264657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 69364657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 69464657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 69564657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 69664657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 69764657d84SAmlan Barua #else 698e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 69964657d84SAmlan Barua #endif 7003c3a9c75SAmlan Barua break; 7013c3a9c75SAmlan Barua case 2: 702bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 703*7a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 70426fbe8dcSKarl Rupp 7054f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 7064f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 707bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 708bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 709bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 710bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 711bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 712bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 713bd59e6c6SAmlan Barua #else 7143c3a9c75SAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 71526fbe8dcSKarl Rupp 7163c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 7179496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 7189496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 7193c3a9c75SAmlan Barua 7203564f093SHong Zhang if (dim[1]%2==0) { 7213c3a9c75SAmlan Barua NM = dim[1]+2; 7223564f093SHong Zhang } else { 7233c3a9c75SAmlan Barua NM = dim[1]+1; 7243564f093SHong Zhang } 7253c3a9c75SAmlan Barua for (i=0; i<local_n0; i++) { 7263c3a9c75SAmlan Barua for (j=0; j<dim[1]; j++) { 7273c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 7283c3a9c75SAmlan Barua tempindx1 = i*NM + j; 72926fbe8dcSKarl Rupp 7305b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 7313c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 7323c3a9c75SAmlan Barua } 7333c3a9c75SAmlan Barua } 7343c3a9c75SAmlan Barua 7353c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 7363c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 7373c3a9c75SAmlan Barua 738f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 739f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 741f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 742b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 743b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 744b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 745b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 746bd59e6c6SAmlan Barua #endif 7479496c9aeSAmlan Barua break; 7483c3a9c75SAmlan Barua 7493c3a9c75SAmlan Barua case 3: 750bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 751*7a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 75226fbe8dcSKarl Rupp 7534f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 7544f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 755bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 756bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 757bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 758bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 759bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 760bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 761bd59e6c6SAmlan Barua #else 762*7a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 76326fbe8dcSKarl Rupp 76451d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 76551d1eed7SAmlan Barua 7669496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 7679496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 76851d1eed7SAmlan Barua 769db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 770db4deed7SKarl Rupp else NM = dim[2]+1; 77151d1eed7SAmlan Barua 77251d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 77351d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 77451d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 77551d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 77651d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 77726fbe8dcSKarl Rupp 77851d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 77951d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 78051d1eed7SAmlan Barua } 78151d1eed7SAmlan Barua } 78251d1eed7SAmlan Barua } 78351d1eed7SAmlan Barua 78451d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 78551d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 78651d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 78751d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 78851d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 78951d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 790b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 791b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 792b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 793b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 794bd59e6c6SAmlan Barua #endif 7959496c9aeSAmlan Barua break; 7963c3a9c75SAmlan Barua 7973c3a9c75SAmlan Barua default: 798bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 799*7a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 80026fbe8dcSKarl Rupp 8014f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 8024f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 803bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 804bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 805bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 806bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 807bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 808bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 809bd59e6c6SAmlan Barua #else 810e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 81126fbe8dcSKarl Rupp 812e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 81326fbe8dcSKarl Rupp 814*7a21806cSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 81526fbe8dcSKarl Rupp 816e5d7f247SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 81726fbe8dcSKarl Rupp 818e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 819e5d7f247SAmlan Barua 820e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 821e5d7f247SAmlan Barua 822e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 823e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 824e5d7f247SAmlan Barua 825db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 826db4deed7SKarl Rupp else NM = 1; 827e5d7f247SAmlan Barua 8286971673cSAmlan Barua j = low; 8293564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 8306971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 8316971673cSAmlan Barua indx2[i] = j; 83226fbe8dcSKarl Rupp if (k%dim[ndim-1]==0) j+=NM; 8336971673cSAmlan Barua j++; 8346971673cSAmlan Barua } 8356971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 8366971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 8376971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 8386971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8396971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8406971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 841b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 842b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 843b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 844b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 845bd59e6c6SAmlan Barua #endif 8469496c9aeSAmlan Barua break; 8473c3a9c75SAmlan Barua } 848e81bb599SAmlan Barua } 8493564f093SHong Zhang PetscFunctionReturn(0); 8503c3a9c75SAmlan Barua } 8513c3a9c75SAmlan Barua 8523c3a9c75SAmlan Barua #undef __FUNCT__ 85374a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc" 8543564f093SHong Zhang /*@ 8553564f093SHong Zhang VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 8563564f093SHong Zhang 8573564f093SHong Zhang Collective on Mat 8583564f093SHong Zhang 8593564f093SHong Zhang Input Parameters: 8603564f093SHong Zhang + A - FFTW matrix 8613564f093SHong Zhang - x - FFTW vector 8623564f093SHong Zhang 8633564f093SHong Zhang Output Parameters: 8643564f093SHong Zhang . y - PETSc vector 8653564f093SHong Zhang 8663564f093SHong Zhang Level: intermediate 8673564f093SHong Zhang 8683564f093SHong Zhang Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 8693564f093SHong Zhang VecScatterFFTWToPetsc removes those extra zeros. 8703564f093SHong Zhang 8713564f093SHong Zhang .seealso: VecScatterPetscToFFTW() 8723564f093SHong Zhang @*/ 87374a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 8743c3a9c75SAmlan Barua { 8753c3a9c75SAmlan Barua PetscErrorCode ierr; 8765fd66863SKarl Rupp 8773c3a9c75SAmlan Barua PetscFunctionBegin; 87874a26884SAmlan Barua ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 8793c3a9c75SAmlan Barua PetscFunctionReturn(0); 8803c3a9c75SAmlan Barua } 88154efbe56SHong Zhang 8823c3a9c75SAmlan Barua #undef __FUNCT__ 88374a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 88474a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 8855b3e41ffSAmlan Barua { 8865b3e41ffSAmlan Barua PetscErrorCode ierr; 887ce94432eSBarry Smith MPI_Comm comm; 8885b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 8895b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 8909496c9aeSAmlan Barua PetscInt N = fft->N; 891b3655f67SAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim; 8929496c9aeSAmlan Barua PetscInt low; 893*7a21806cSHong Zhang PetscMPIInt rank,size; 894*7a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 8959496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 8965b3e41ffSAmlan Barua VecScatter vecscat; 8975b3e41ffSAmlan Barua IS list1,list2; 8989496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 8999496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 9009496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 9019496c9aeSAmlan Barua PetscInt N1, n1,NM; 9029496c9aeSAmlan Barua ptrdiff_t temp; 9039496c9aeSAmlan Barua #endif 9049496c9aeSAmlan Barua 9053564f093SHong Zhang PetscFunctionBegin; 906ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 9075b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 9085b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 9090298fd71SBarry Smith ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 9105b3e41ffSAmlan Barua 911e81bb599SAmlan Barua if (size==1) { 912b3655f67SAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 9136971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 9146971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9156971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9166971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 917b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 918e81bb599SAmlan Barua 9193564f093SHong Zhang } else { 920e81bb599SAmlan Barua switch (ndim) { 921e81bb599SAmlan Barua case 1: 92264657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 923*7a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 92426fbe8dcSKarl Rupp 9254f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 9264f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,low,1,&list2); 92764657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 92864657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 92964657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 93064657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 93164657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 93264657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 93364657d84SAmlan Barua #else 9346a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 93564657d84SAmlan Barua #endif 9365b3e41ffSAmlan Barua break; 9375b3e41ffSAmlan Barua case 2: 938bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 939*7a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 94026fbe8dcSKarl Rupp 9414f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 9424f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 943bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 944bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 946bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 947bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 948bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 949bd59e6c6SAmlan Barua #else 950*7a21806cSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 95126fbe8dcSKarl Rupp 9525b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 9535b3e41ffSAmlan Barua 9549496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 9559496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 9565b3e41ffSAmlan Barua 957db4deed7SKarl Rupp if (dim[1]%2==0) NM = dim[1]+2; 958db4deed7SKarl Rupp else NM = dim[1]+1; 9595b3e41ffSAmlan Barua 9605b3e41ffSAmlan Barua for (i=0; i<local_n0; i++) { 9615b3e41ffSAmlan Barua for (j=0; j<dim[1]; j++) { 9625b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 9635b3e41ffSAmlan Barua tempindx1 = i*NM + j; 96426fbe8dcSKarl Rupp 9655b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 9665b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 9675b3e41ffSAmlan Barua } 9685b3e41ffSAmlan Barua } 9695b3e41ffSAmlan Barua 9705b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 9715b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 9725b3e41ffSAmlan Barua 9735b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 9745b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9755b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9765b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 977b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 978b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 979b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 980b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 981bd59e6c6SAmlan Barua #endif 9829496c9aeSAmlan Barua break; 9835b3e41ffSAmlan Barua case 3: 984bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 985*7a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 98626fbe8dcSKarl Rupp 9874f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 9884f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 989bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 990bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 991bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 993bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 994bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 995bd59e6c6SAmlan Barua #else 996bd59e6c6SAmlan Barua 997*7a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 99826fbe8dcSKarl Rupp 99951d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 100051d1eed7SAmlan Barua 10019496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 10029496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 100351d1eed7SAmlan Barua 1004db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 1005db4deed7SKarl Rupp else NM = dim[2]+1; 100651d1eed7SAmlan Barua 100751d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 100851d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 100951d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 101051d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 101151d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 101226fbe8dcSKarl Rupp 101351d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 101451d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 101551d1eed7SAmlan Barua } 101651d1eed7SAmlan Barua } 101751d1eed7SAmlan Barua } 101851d1eed7SAmlan Barua 101951d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 102051d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 102151d1eed7SAmlan Barua 102251d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 102351d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 102451d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 102551d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1026b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1027b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1028b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1029b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1030bd59e6c6SAmlan Barua #endif 10319496c9aeSAmlan Barua break; 10325b3e41ffSAmlan Barua default: 1033bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1034*7a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 103526fbe8dcSKarl Rupp 10364f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 10374f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1038bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1039bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1040bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1041bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1042bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1043bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1044bd59e6c6SAmlan Barua #else 1045ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 104626fbe8dcSKarl Rupp 1047ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 104826fbe8dcSKarl Rupp 1049ba6e06d0SAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 105026fbe8dcSKarl Rupp 1051ba6e06d0SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 105226fbe8dcSKarl Rupp 1053ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1054ba6e06d0SAmlan Barua 1055ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1056ba6e06d0SAmlan Barua 1057ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1058ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1059ba6e06d0SAmlan Barua 1060db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 1061db4deed7SKarl Rupp else NM = 1; 1062ba6e06d0SAmlan Barua 1063ba6e06d0SAmlan Barua j = low; 10643564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1065ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1066ba6e06d0SAmlan Barua indx2[i] = j; 10673564f093SHong Zhang if (k%dim[ndim-1]==0) j+=NM; 1068ba6e06d0SAmlan Barua j++; 1069ba6e06d0SAmlan Barua } 1070ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1071ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1072ba6e06d0SAmlan Barua 1073ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1074ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1075ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1076ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1077b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1078b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1079b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1080b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1081bd59e6c6SAmlan Barua #endif 10829496c9aeSAmlan Barua break; 10835b3e41ffSAmlan Barua } 1084e81bb599SAmlan Barua } 10853564f093SHong Zhang PetscFunctionReturn(0); 10865b3e41ffSAmlan Barua } 10875b3e41ffSAmlan Barua 10885b3e41ffSAmlan Barua #undef __FUNCT__ 10893c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 10903c3a9c75SAmlan Barua /* 10913564f093SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 10923564f093SHong Zhang 10933c3a9c75SAmlan Barua Options Database Keys: 10943c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 10953c3a9c75SAmlan Barua 10963c3a9c75SAmlan Barua Level: intermediate 10973c3a9c75SAmlan Barua 10983c3a9c75SAmlan Barua */ 10998cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1100b2b77a04SHong Zhang { 1101b2b77a04SHong Zhang PetscErrorCode ierr; 1102ce94432eSBarry Smith MPI_Comm comm; 1103b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1104b2b77a04SHong Zhang Mat_FFTW *fftw; 1105b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 11065274ed1bSHong Zhang const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 11075274ed1bSHong Zhang unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1108b2b77a04SHong Zhang PetscBool flg; 11094f48bc67SAmlan Barua PetscInt p_flag,partial_dim=1,ctr; 111011902ff2SHong Zhang PetscMPIInt size,rank; 11119496c9aeSAmlan Barua ptrdiff_t *pdim; 11129496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 11139496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11149496c9aeSAmlan Barua ptrdiff_t temp; 11158ca4c034SAmlan Barua PetscInt N1,tot_dim; 11164f48bc67SAmlan Barua #else 11174f48bc67SAmlan Barua PetscInt n1; 11189496c9aeSAmlan Barua #endif 11199496c9aeSAmlan Barua 1120b2b77a04SHong Zhang PetscFunctionBegin; 1121ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1122b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 112311902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1124c92418ccSAmlan Barua 11251878d478SAmlan Barua fftw_mpi_init(); 112611902ff2SHong Zhang pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 112711902ff2SHong Zhang pdim[0] = dim[0]; 11288ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11298ca4c034SAmlan Barua tot_dim = 2*dim[0]; 11308ca4c034SAmlan Barua #endif 11313564f093SHong Zhang for (ctr=1; ctr<ndim; ctr++) { 11326882372aSHong Zhang partial_dim *= dim[ctr]; 113311902ff2SHong Zhang pdim[ctr] = dim[ctr]; 11348ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1135db4deed7SKarl Rupp if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1136db4deed7SKarl Rupp else tot_dim *= dim[ctr]; 11378ca4c034SAmlan Barua #endif 11386882372aSHong Zhang } 11393c3a9c75SAmlan Barua 1140b2b77a04SHong Zhang if (size == 1) { 1141e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1142b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1143b2b77a04SHong Zhang n = N; 1144e81bb599SAmlan Barua #else 1145e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1146e81bb599SAmlan Barua n = tot_dim; 1147e81bb599SAmlan Barua #endif 1148e81bb599SAmlan Barua 1149b2b77a04SHong Zhang } else { 1150*7a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 1151b2b77a04SHong Zhang switch (ndim) { 1152b2b77a04SHong Zhang case 1: 11533c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11543c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1155e5d7f247SAmlan Barua #else 1156*7a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 115726fbe8dcSKarl Rupp 11586882372aSHong Zhang n = (PetscInt)local_n0; 11599496c9aeSAmlan Barua n1 = (PetscInt)local_n1; 11609496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1161e5d7f247SAmlan Barua #endif 1162b2b77a04SHong Zhang break; 1163b2b77a04SHong Zhang case 2: 11645b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 1165*7a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1166b2b77a04SHong Zhang /* 1167b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 11680ec8b6e3SBarry Smith PetscSynchronizedFlush(comm,PETSC_STDOUT); 1169b2b77a04SHong Zhang */ 1170b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1171b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 11725b3e41ffSAmlan Barua #else 11734f8276c3SHong Zhang alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 117426fbe8dcSKarl Rupp 11755b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1176c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 11775b3e41ffSAmlan Barua #endif 1178b2b77a04SHong Zhang break; 1179b2b77a04SHong Zhang case 3: 118051d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1181*7a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 118226fbe8dcSKarl Rupp 11836882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 11846882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 118551d1eed7SAmlan Barua #else 11864f8276c3SHong Zhang alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 118726fbe8dcSKarl Rupp 118851d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1189c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 119051d1eed7SAmlan Barua #endif 1191b2b77a04SHong Zhang break; 1192b2b77a04SHong Zhang default: 1193b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1194*7a21806cSHong Zhang fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 119526fbe8dcSKarl Rupp 11966882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 11976882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1198b3a17365SAmlan Barua #else 1199b3a17365SAmlan Barua temp = pdim[ndim-1]; 120026fbe8dcSKarl Rupp 1201b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 120226fbe8dcSKarl Rupp 12034f8276c3SHong Zhang alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 120426fbe8dcSKarl Rupp 1205b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1206b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 120726fbe8dcSKarl Rupp 1208b3a17365SAmlan Barua pdim[ndim-1] = temp; 120926fbe8dcSKarl Rupp 1210c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1211b3a17365SAmlan Barua #endif 1212b2b77a04SHong Zhang break; 1213b2b77a04SHong Zhang } 1214b2b77a04SHong Zhang } 1215b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1216b00a9115SJed Brown ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1217b2b77a04SHong Zhang fft->data = (void*)fftw; 1218b2b77a04SHong Zhang 1219b2b77a04SHong Zhang fft->n = n; 12200c74a584SJed Brown fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1221e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 122226fbe8dcSKarl Rupp 12235e806cb5SMatthew G. Knepley ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 122426fbe8dcSKarl Rupp 1225b1301b2eSHong Zhang for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1226c92418ccSAmlan Barua 1227b2b77a04SHong Zhang fftw->p_forward = 0; 1228b2b77a04SHong Zhang fftw->p_backward = 0; 1229b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1230b2b77a04SHong Zhang 1231b2b77a04SHong Zhang if (size == 1) { 1232b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1233b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1234b2b77a04SHong Zhang } else { 1235b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1236b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1237b2b77a04SHong Zhang } 1238b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1239b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 12404a52bad8SHong Zhang A->preallocated = PETSC_TRUE; 124126fbe8dcSKarl Rupp 1242bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr); 1243bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1244bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1245b2b77a04SHong Zhang 1246b2b77a04SHong Zhang /* get runtime options */ 1247ce94432eSBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 12485274ed1bSHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 12495274ed1bSHong Zhang if (flg) { 12505274ed1bSHong Zhang fftw->p_flag = iplans[p_flag]; 12515274ed1bSHong Zhang } 12524a52bad8SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 1253b2b77a04SHong Zhang PetscFunctionReturn(0); 1254b2b77a04SHong Zhang } 12553c3a9c75SAmlan Barua 12563c3a9c75SAmlan Barua 12573c3a9c75SAmlan Barua 12583c3a9c75SAmlan Barua 1259