1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14*a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 15*a1b6d50cSHong Zhang fftw_iodim64 *iodims; 16*a1b6d50cSHong Zhang #else 178c1d5ab3SHong Zhang fftw_iodim *iodims; 18*a1b6d50cSHong Zhang #endif 19e5d7f247SAmlan Barua PetscInt partial_dim; 20b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 21b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 22b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 23b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 24b2b77a04SHong Zhang } Mat_FFTW; 25b2b77a04SHong Zhang 26b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 27b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 28b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 29b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 30b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 31b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 32b2b77a04SHong Zhang 3397504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel 3497504071SAmlan Barua Input parameter: 353564f093SHong Zhang A - the matrix 3697504071SAmlan Barua x - the vector on which FDFT will be performed 3797504071SAmlan Barua 3897504071SAmlan Barua Output parameter: 3997504071SAmlan Barua y - vector that stores result of FDFT 4097504071SAmlan Barua */ 41b2b77a04SHong Zhang #undef __FUNCT__ 42b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 43b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 44b2b77a04SHong Zhang { 45b2b77a04SHong Zhang PetscErrorCode ierr; 46b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 47b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 48b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 49*a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 50*a1b6d50cSHong Zhang fftw_iodim64 *iodims; 51*a1b6d50cSHong Zhang #else 528c1d5ab3SHong Zhang fftw_iodim *iodims; 53*a1b6d50cSHong Zhang #endif 54bb5bf6f6SHong Zhang PetscInt ndim = fft->ndim,*dim = fft->dim,i; 55b2b77a04SHong Zhang 56b2b77a04SHong Zhang PetscFunctionBegin; 57b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 58b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 59b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 60b2b77a04SHong Zhang switch (ndim) { 61b2b77a04SHong Zhang case 1: 6258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 63b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6458a912c5SAmlan Barua #else 6558a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 6658a912c5SAmlan Barua #endif 67b2b77a04SHong Zhang break; 68b2b77a04SHong Zhang case 2: 6958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 70b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 7158a912c5SAmlan Barua #else 7258a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 7358a912c5SAmlan Barua #endif 74b2b77a04SHong Zhang break; 75b2b77a04SHong Zhang case 3: 7658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 77b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 7858a912c5SAmlan Barua #else 7958a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 8058a912c5SAmlan Barua #endif 81b2b77a04SHong Zhang break; 82b2b77a04SHong Zhang default: 8358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 84*a1b6d50cSHong Zhang iodims = fftw->iodims; 85*a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 868c1d5ab3SHong Zhang if (ndim) { 87*a1b6d50cSHong Zhang iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1]; 888c1d5ab3SHong Zhang iodims[ndim-1].is = iodims[ndim-1].os = 1; 898c1d5ab3SHong Zhang for (i=ndim-2; i>=0; --i) { 90*a1b6d50cSHong Zhang iodims[i].n = (ptrdiff_t)dim[i]; 918c1d5ab3SHong Zhang iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 928c1d5ab3SHong Zhang } 938c1d5ab3SHong Zhang } 94*a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 95*a1b6d50cSHong Zhang #else 96*a1b6d50cSHong Zhang if (ndim) { 97*a1b6d50cSHong Zhang iodims[ndim-1].n = (int)dim[ndim-1]; 98*a1b6d50cSHong Zhang iodims[ndim-1].is = iodims[ndim-1].os = 1; 99*a1b6d50cSHong Zhang for (i=ndim-2; i>=0; --i) { 100*a1b6d50cSHong Zhang iodims[i].n = (int)dim[i]; 101*a1b6d50cSHong Zhang iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 102*a1b6d50cSHong Zhang } 103*a1b6d50cSHong Zhang } 104*a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 105*a1b6d50cSHong Zhang #endif 106*a1b6d50cSHong Zhang 10758a912c5SAmlan Barua #else 10858a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 10958a912c5SAmlan Barua #endif 110b2b77a04SHong Zhang break; 111b2b77a04SHong Zhang } 112b2b77a04SHong Zhang fftw->finarray = x_array; 113b2b77a04SHong Zhang fftw->foutarray = y_array; 114b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 115b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 116b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 117b2b77a04SHong Zhang } else { /* use existing plan */ 118b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 119b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 120b2b77a04SHong Zhang } else { 121b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 122b2b77a04SHong Zhang } 123b2b77a04SHong Zhang } 124b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 125b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 126b2b77a04SHong Zhang PetscFunctionReturn(0); 127b2b77a04SHong Zhang } 128b2b77a04SHong Zhang 12997504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT 13097504071SAmlan Barua Input parameter: 1313564f093SHong Zhang A - the matrix 13297504071SAmlan Barua x - the vector on which BDFT will be performed 13397504071SAmlan Barua 13497504071SAmlan Barua Output parameter: 13597504071SAmlan Barua y - vector that stores result of BDFT 13697504071SAmlan Barua */ 13797504071SAmlan Barua 138b2b77a04SHong Zhang #undef __FUNCT__ 139b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 140b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 141b2b77a04SHong Zhang { 142b2b77a04SHong Zhang PetscErrorCode ierr; 143b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 144b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 145b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 146b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 147*a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 148*a1b6d50cSHong Zhang fftw_iodim64 *iodims=fftw->iodims; 149*a1b6d50cSHong Zhang #else 150*a1b6d50cSHong Zhang fftw_iodim *iodims=fftw->iodims; 151*a1b6d50cSHong Zhang #endif 152b2b77a04SHong Zhang 153b2b77a04SHong Zhang PetscFunctionBegin; 154b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 155b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 156b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 157b2b77a04SHong Zhang switch (ndim) { 158b2b77a04SHong Zhang case 1: 15958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 160b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 16158a912c5SAmlan Barua #else 162e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 16358a912c5SAmlan Barua #endif 164b2b77a04SHong Zhang break; 165b2b77a04SHong Zhang case 2: 16658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 167b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 16858a912c5SAmlan Barua #else 169e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 17058a912c5SAmlan Barua #endif 171b2b77a04SHong Zhang break; 172b2b77a04SHong Zhang case 3: 17358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 174b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 17558a912c5SAmlan Barua #else 176e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 17758a912c5SAmlan Barua #endif 178b2b77a04SHong Zhang break; 179b2b77a04SHong Zhang default: 18058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 181*a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 182*a1b6d50cSHong Zhang fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 183*a1b6d50cSHong Zhang #else 1848c1d5ab3SHong Zhang fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 185*a1b6d50cSHong Zhang #endif 18658a912c5SAmlan Barua #else 187e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 18858a912c5SAmlan Barua #endif 189b2b77a04SHong Zhang break; 190b2b77a04SHong Zhang } 191b2b77a04SHong Zhang fftw->binarray = x_array; 192b2b77a04SHong Zhang fftw->boutarray = y_array; 193b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 194b2b77a04SHong Zhang } else { /* use existing plan */ 195b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 196b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 197b2b77a04SHong Zhang } else { 198b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 199b2b77a04SHong Zhang } 200b2b77a04SHong Zhang } 201b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 202b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 203b2b77a04SHong Zhang PetscFunctionReturn(0); 204b2b77a04SHong Zhang } 205b2b77a04SHong Zhang 20697504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel 20797504071SAmlan Barua Input parameter: 2083564f093SHong Zhang A - the matrix 20997504071SAmlan Barua x - the vector on which FDFT will be performed 21097504071SAmlan Barua 21197504071SAmlan Barua Output parameter: 21297504071SAmlan Barua y - vector that stores result of FDFT 21397504071SAmlan Barua */ 214b2b77a04SHong Zhang #undef __FUNCT__ 215b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 216b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 217b2b77a04SHong Zhang { 218b2b77a04SHong Zhang PetscErrorCode ierr; 219b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 220b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 221b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 222c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 223ce94432eSBarry Smith MPI_Comm comm; 224b2b77a04SHong Zhang 225b2b77a04SHong Zhang PetscFunctionBegin; 226ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 227b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 228b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 229b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 230b2b77a04SHong Zhang switch (ndim) { 231b2b77a04SHong Zhang case 1: 2323c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 233b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 234ae0a50aaSHong Zhang #else 2354f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 2363c3a9c75SAmlan Barua #endif 237b2b77a04SHong Zhang break; 238b2b77a04SHong Zhang case 2: 23997504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 240b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2413c3a9c75SAmlan Barua #else 2423c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2433c3a9c75SAmlan Barua #endif 244b2b77a04SHong Zhang break; 245b2b77a04SHong Zhang case 3: 2463c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 247b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2483c3a9c75SAmlan Barua #else 24951d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2503c3a9c75SAmlan Barua #endif 251b2b77a04SHong Zhang break; 252b2b77a04SHong Zhang default: 2533c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 254c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2553c3a9c75SAmlan Barua #else 2563c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2573c3a9c75SAmlan Barua #endif 258b2b77a04SHong Zhang break; 259b2b77a04SHong Zhang } 260b2b77a04SHong Zhang fftw->finarray = x_array; 261b2b77a04SHong Zhang fftw->foutarray = y_array; 262b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 263b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 264b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 265b2b77a04SHong Zhang } else { /* use existing plan */ 266b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 267b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 268b2b77a04SHong Zhang } else { 269b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 270b2b77a04SHong Zhang } 271b2b77a04SHong Zhang } 272b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 273b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 274b2b77a04SHong Zhang PetscFunctionReturn(0); 275b2b77a04SHong Zhang } 276b2b77a04SHong Zhang 27797504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT 27897504071SAmlan Barua Input parameter: 2793564f093SHong Zhang A - the matrix 28097504071SAmlan Barua x - the vector on which BDFT will be performed 28197504071SAmlan Barua 28297504071SAmlan Barua Output parameter: 28397504071SAmlan Barua y - vector that stores result of BDFT 28497504071SAmlan Barua */ 285b2b77a04SHong Zhang #undef __FUNCT__ 286b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 287b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 288b2b77a04SHong Zhang { 289b2b77a04SHong Zhang PetscErrorCode ierr; 290b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 291b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 292b2b77a04SHong Zhang PetscScalar *x_array,*y_array; 293c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 294ce94432eSBarry Smith MPI_Comm comm; 295b2b77a04SHong Zhang 296b2b77a04SHong Zhang PetscFunctionBegin; 297ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 298b2b77a04SHong Zhang ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 299b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 300b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 301b2b77a04SHong Zhang switch (ndim) { 302b2b77a04SHong Zhang case 1: 3033c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 304b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 305ae0a50aaSHong Zhang #else 3064f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 3073c3a9c75SAmlan Barua #endif 308b2b77a04SHong Zhang break; 309b2b77a04SHong Zhang case 2: 31097504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 311b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3123c3a9c75SAmlan Barua #else 3133c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3143c3a9c75SAmlan Barua #endif 315b2b77a04SHong Zhang break; 316b2b77a04SHong Zhang case 3: 3173c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 318b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3193c3a9c75SAmlan Barua #else 3203c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3213c3a9c75SAmlan Barua #endif 322b2b77a04SHong Zhang break; 323b2b77a04SHong Zhang default: 3243c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 325c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3263c3a9c75SAmlan Barua #else 3273c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3283c3a9c75SAmlan Barua #endif 329b2b77a04SHong Zhang break; 330b2b77a04SHong Zhang } 331b2b77a04SHong Zhang fftw->binarray = x_array; 332b2b77a04SHong Zhang fftw->boutarray = y_array; 333b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 334b2b77a04SHong Zhang } else { /* use existing plan */ 335b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 336b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 337b2b77a04SHong Zhang } else { 338b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 339b2b77a04SHong Zhang } 340b2b77a04SHong Zhang } 341b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 342b2b77a04SHong Zhang ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 343b2b77a04SHong Zhang PetscFunctionReturn(0); 344b2b77a04SHong Zhang } 345b2b77a04SHong Zhang 346b2b77a04SHong Zhang #undef __FUNCT__ 347b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 348b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 349b2b77a04SHong Zhang { 350b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 351b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 352b2b77a04SHong Zhang PetscErrorCode ierr; 353b2b77a04SHong Zhang 354b2b77a04SHong Zhang PetscFunctionBegin; 355b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 356b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 3578c1d5ab3SHong Zhang if (fftw->iodims) { 3588c1d5ab3SHong Zhang free(fftw->iodims); 3598c1d5ab3SHong Zhang } 360bb5bf6f6SHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 361bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 3626ccf0b3eSHong Zhang fftw_mpi_cleanup(); 363b2b77a04SHong Zhang PetscFunctionReturn(0); 364b2b77a04SHong Zhang } 365b2b77a04SHong Zhang 366c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 367b2b77a04SHong Zhang #undef __FUNCT__ 368b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 369b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 370b2b77a04SHong Zhang { 371b2b77a04SHong Zhang PetscErrorCode ierr; 372b2b77a04SHong Zhang PetscScalar *array; 373b2b77a04SHong Zhang 374b2b77a04SHong Zhang PetscFunctionBegin; 375b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 376b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 377b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 378b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 379b2b77a04SHong Zhang PetscFunctionReturn(0); 380b2b77a04SHong Zhang } 381b2b77a04SHong Zhang 3824f7415efSAmlan Barua #undef __FUNCT__ 3834be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW" 3844be45526SHong Zhang /*@ 385b2aa233eSHong Zhang MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 3864f7415efSAmlan Barua parallel layout determined by FFTW 3874f7415efSAmlan Barua 3884f7415efSAmlan Barua Collective on Mat 3894f7415efSAmlan Barua 3904f7415efSAmlan Barua Input Parameter: 3913564f093SHong Zhang . A - the matrix 3924f7415efSAmlan Barua 3934f7415efSAmlan Barua Output Parameter: 394cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW 395cc55f3b1SHong Zhang - y - (optional) output vector of forward FFTW 396cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW 3974f7415efSAmlan Barua 3984f7415efSAmlan Barua Level: advanced 3993564f093SHong Zhang 40097504071SAmlan Barua Note: The parallel layout of output of forward FFTW is always same as the input 40197504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 40297504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 40397504071SAmlan Barua Also note that we need to provide enough space while doing parallel real transform. 40497504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 40597504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 406e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 407e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two 408e0ec536eSAmlan Barua zeros if it is odd only one zero is needed. 409e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local 410e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for 411e0ec536eSAmlan Barua each processor and returns that. 4124f7415efSAmlan Barua 4134f7415efSAmlan Barua .seealso: MatCreateFFTW() 4144be45526SHong Zhang @*/ 4154be45526SHong Zhang PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 4164be45526SHong Zhang { 4174be45526SHong Zhang PetscErrorCode ierr; 418b4c3921fSHong Zhang 4194be45526SHong Zhang PetscFunctionBegin; 4204be45526SHong Zhang ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 4214be45526SHong Zhang PetscFunctionReturn(0); 4224be45526SHong Zhang } 4234be45526SHong Zhang 4244be45526SHong Zhang #undef __FUNCT__ 4254be45526SHong Zhang #define __FUNCT__ "MatGetVecsFFTW_FFTW" 4264be45526SHong Zhang PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 4274f7415efSAmlan Barua { 4284f7415efSAmlan Barua PetscErrorCode ierr; 4294f7415efSAmlan Barua PetscMPIInt size,rank; 430ce94432eSBarry Smith MPI_Comm comm; 4314f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 4324f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 4339496c9aeSAmlan Barua PetscInt N = fft->N; 4344f7415efSAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 4354f7415efSAmlan Barua 4364f7415efSAmlan Barua PetscFunctionBegin; 4374f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 4384f7415efSAmlan Barua PetscValidType(A,1); 439ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4404f7415efSAmlan Barua 4414f8276c3SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 4424f8276c3SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 4434f7415efSAmlan Barua if (size == 1) { /* sequential case */ 4444f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4454f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 4464f7415efSAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 4474f7415efSAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 4484f7415efSAmlan Barua #else 4498ca4c034SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 4508ca4c034SAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 4518ca4c034SAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 4524f7415efSAmlan Barua #endif 45397504071SAmlan Barua } else { /* parallel cases */ 4544f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 4559496c9aeSAmlan Barua ptrdiff_t local_n1; 4569496c9aeSAmlan Barua fftw_complex *data_fout; 4579496c9aeSAmlan Barua ptrdiff_t local_1_start; 4589496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4599496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 4609496c9aeSAmlan Barua #else 4614f7415efSAmlan Barua double *data_finr,*data_boutr; 4626ccf0b3eSHong Zhang PetscInt n1,N1; 4639496c9aeSAmlan Barua ptrdiff_t temp; 4649496c9aeSAmlan Barua #endif 4659496c9aeSAmlan Barua 4664f7415efSAmlan Barua switch (ndim) { 4674f7415efSAmlan Barua case 1: 46857625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4694f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 47057625b84SAmlan Barua #else 47157625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 47257625b84SAmlan Barua if (fin) { 47357625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 474778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 47526fbe8dcSKarl Rupp 47657625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 47757625b84SAmlan Barua } 47857625b84SAmlan Barua if (fout) { 47957625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 480778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 48126fbe8dcSKarl Rupp 48257625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 48357625b84SAmlan Barua } 48457625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 48557625b84SAmlan Barua if (bout) { 48657625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 487778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 48826fbe8dcSKarl Rupp 48957625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 49057625b84SAmlan Barua } 49157625b84SAmlan Barua break; 49257625b84SAmlan Barua #endif 4934f7415efSAmlan Barua case 2: 49497504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 4954f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4964f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 4974f7415efSAmlan Barua if (fin) { 4984f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 499778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 50026fbe8dcSKarl Rupp 5014f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5024f7415efSAmlan Barua } 5034f7415efSAmlan Barua if (bout) { 5044f7415efSAmlan Barua data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 505778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 50626fbe8dcSKarl Rupp 5074f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5084f7415efSAmlan Barua } 50957625b84SAmlan Barua if (fout) { 51057625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 511778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 51226fbe8dcSKarl Rupp 51357625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 51457625b84SAmlan Barua } 5154f7415efSAmlan Barua #else 5164f7415efSAmlan Barua /* Get local size */ 5174f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 5184f7415efSAmlan Barua if (fin) { 5194f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 520778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 52126fbe8dcSKarl Rupp 5224f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5234f7415efSAmlan Barua } 5244f7415efSAmlan Barua if (fout) { 5254f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 526778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 52726fbe8dcSKarl Rupp 5284f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5294f7415efSAmlan Barua } 5304f7415efSAmlan Barua if (bout) { 5314f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 532778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 53326fbe8dcSKarl Rupp 5344f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5354f7415efSAmlan Barua } 5364f7415efSAmlan Barua #endif 5374f7415efSAmlan Barua break; 5384f7415efSAmlan Barua case 3: 5394f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5404f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5414f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 5424f7415efSAmlan Barua if (fin) { 5434f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 544778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 54526fbe8dcSKarl Rupp 5464f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5474f7415efSAmlan Barua } 5484f7415efSAmlan Barua if (bout) { 5494f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 550778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 55126fbe8dcSKarl Rupp 5524f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5534f7415efSAmlan Barua } 5543564f093SHong Zhang 55557625b84SAmlan Barua if (fout) { 55657625b84SAmlan Barua data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 557778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 55826fbe8dcSKarl Rupp 55957625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 56057625b84SAmlan Barua } 5614f7415efSAmlan Barua #else 5620c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 5630c9b18e4SAmlan Barua if (fin) { 5640c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 565778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 56626fbe8dcSKarl Rupp 5670c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5680c9b18e4SAmlan Barua } 5690c9b18e4SAmlan Barua if (fout) { 5700c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 571778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 57226fbe8dcSKarl Rupp 5730c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5740c9b18e4SAmlan Barua } 5750c9b18e4SAmlan Barua if (bout) { 5760c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 577778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 57826fbe8dcSKarl Rupp 5790c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5800c9b18e4SAmlan Barua } 5814f7415efSAmlan Barua #endif 5824f7415efSAmlan Barua break; 5834f7415efSAmlan Barua default: 5844f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5854f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 58626fbe8dcSKarl Rupp 5874f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 58826fbe8dcSKarl Rupp 5894f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5904f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 59126fbe8dcSKarl Rupp 5924f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 5934f7415efSAmlan Barua 5944f7415efSAmlan Barua if (fin) { 5954f7415efSAmlan Barua data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 596778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 59726fbe8dcSKarl Rupp 5984f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5994f7415efSAmlan Barua } 6004f7415efSAmlan Barua if (bout) { 6014f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 602778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 60326fbe8dcSKarl Rupp 6049496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6054f7415efSAmlan Barua } 6063564f093SHong Zhang 60757625b84SAmlan Barua if (fout) { 60857625b84SAmlan Barua data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 609778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 61026fbe8dcSKarl Rupp 61157625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 61257625b84SAmlan Barua } 6134f7415efSAmlan Barua #else 6140c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 6150c9b18e4SAmlan Barua if (fin) { 6160c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 617778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 61826fbe8dcSKarl Rupp 6190c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6200c9b18e4SAmlan Barua } 6210c9b18e4SAmlan Barua if (fout) { 6220c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 623778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 62426fbe8dcSKarl Rupp 6250c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6260c9b18e4SAmlan Barua } 6270c9b18e4SAmlan Barua if (bout) { 6280c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 629778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 63026fbe8dcSKarl Rupp 6310c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6320c9b18e4SAmlan Barua } 6334f7415efSAmlan Barua #endif 6344f7415efSAmlan Barua break; 6354f7415efSAmlan Barua } 6364f7415efSAmlan Barua } 6374f7415efSAmlan Barua PetscFunctionReturn(0); 6384f7415efSAmlan Barua } 6394f7415efSAmlan Barua 640c92418ccSAmlan Barua #undef __FUNCT__ 64174a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW" 6423564f093SHong Zhang /*@ 6433564f093SHong Zhang VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 64454efbe56SHong Zhang 6453564f093SHong Zhang Collective on Mat 6463564f093SHong Zhang 6473564f093SHong Zhang Input Parameters: 6483564f093SHong Zhang + A - FFTW matrix 6493564f093SHong Zhang - x - the PETSc vector 6503564f093SHong Zhang 6513564f093SHong Zhang Output Parameters: 6523564f093SHong Zhang . y - the FFTW vector 6533564f093SHong Zhang 654b2b77a04SHong Zhang Options Database Keys: 6553564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags 656b2b77a04SHong Zhang 657b2b77a04SHong Zhang Level: intermediate 6583564f093SHong Zhang 65997504071SAmlan Barua Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 66097504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 66197504071SAmlan Barua zeros. This routine does that job by scattering operation. 66297504071SAmlan Barua 6633564f093SHong Zhang .seealso: VecScatterFFTWToPetsc() 6643564f093SHong Zhang @*/ 6653564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 6663564f093SHong Zhang { 6673564f093SHong Zhang PetscErrorCode ierr; 668b2b77a04SHong Zhang 6693564f093SHong Zhang PetscFunctionBegin; 6703564f093SHong Zhang ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 6713564f093SHong Zhang PetscFunctionReturn(0); 6723564f093SHong Zhang } 6733c3a9c75SAmlan Barua 6743c3a9c75SAmlan Barua #undef __FUNCT__ 6751986ecc6SHong Zhang #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 67674a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 6773c3a9c75SAmlan Barua { 6783c3a9c75SAmlan Barua PetscErrorCode ierr; 679ce94432eSBarry Smith MPI_Comm comm; 6803c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 6813c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 6829496c9aeSAmlan Barua PetscInt N =fft->N; 683b5d11533SAmlan Barua PetscInt ndim =fft->ndim,*dim=fft->dim; 6849496c9aeSAmlan Barua PetscInt low; 6857a21806cSHong Zhang PetscMPIInt rank,size; 6867a21806cSHong Zhang PetscInt vsize,vsize1; 6877a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 6889496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 6893c3a9c75SAmlan Barua VecScatter vecscat; 6903c3a9c75SAmlan Barua IS list1,list2; 6919496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6929496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 6939496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 6949496c9aeSAmlan Barua PetscInt N1, n1,NM; 6959496c9aeSAmlan Barua ptrdiff_t temp; 6969496c9aeSAmlan Barua #endif 6973c3a9c75SAmlan Barua 6983564f093SHong Zhang PetscFunctionBegin; 699ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 700f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 701f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 7020298fd71SBarry Smith ierr = VecGetOwnershipRange(y,&low,NULL); 7033c3a9c75SAmlan Barua 7043564f093SHong Zhang if (size==1) { 7058ca4c034SAmlan Barua ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 7068ca4c034SAmlan Barua ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 7079496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 7086971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 7096971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7106971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7116971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 712b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 7133564f093SHong Zhang } else { 7143c3a9c75SAmlan Barua switch (ndim) { 7153c3a9c75SAmlan Barua case 1: 71664657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7177a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 71826fbe8dcSKarl Rupp 7194f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 7204f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,low,1,&list2); 72164657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 72264657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 72364657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 72464657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 72564657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 72664657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 72764657d84SAmlan Barua #else 728e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 72964657d84SAmlan Barua #endif 7303c3a9c75SAmlan Barua break; 7313c3a9c75SAmlan Barua case 2: 732bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7337a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 73426fbe8dcSKarl Rupp 7354f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 7364f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 737bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 738bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 739bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 741bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 742bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 743bd59e6c6SAmlan Barua #else 744c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 74526fbe8dcSKarl Rupp 7463c3a9c75SAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 7479496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 7489496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 7493c3a9c75SAmlan Barua 7503564f093SHong Zhang if (dim[1]%2==0) { 7513c3a9c75SAmlan Barua NM = dim[1]+2; 7523564f093SHong Zhang } else { 7533c3a9c75SAmlan Barua NM = dim[1]+1; 7543564f093SHong Zhang } 7553c3a9c75SAmlan Barua for (i=0; i<local_n0; i++) { 7563c3a9c75SAmlan Barua for (j=0; j<dim[1]; j++) { 7573c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 7583c3a9c75SAmlan Barua tempindx1 = i*NM + j; 75926fbe8dcSKarl Rupp 7605b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 7613c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 7623c3a9c75SAmlan Barua } 7633c3a9c75SAmlan Barua } 7643c3a9c75SAmlan Barua 7653c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 7663c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 7673c3a9c75SAmlan Barua 768f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 769f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 770f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 771f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 772b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 773b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 774b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 775b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 776bd59e6c6SAmlan Barua #endif 7779496c9aeSAmlan Barua break; 7783c3a9c75SAmlan Barua 7793c3a9c75SAmlan Barua case 3: 780bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7817a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 78226fbe8dcSKarl Rupp 7834f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 7844f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 785bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 786bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 787bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 788bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 789bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 790bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 791bd59e6c6SAmlan Barua #else 7927a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 79326fbe8dcSKarl Rupp 79451d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 79551d1eed7SAmlan Barua 7969496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 7979496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 79851d1eed7SAmlan Barua 799db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 800db4deed7SKarl Rupp else NM = dim[2]+1; 80151d1eed7SAmlan Barua 80251d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 80351d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 80451d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 80551d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 80651d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 80726fbe8dcSKarl Rupp 80851d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 80951d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 81051d1eed7SAmlan Barua } 81151d1eed7SAmlan Barua } 81251d1eed7SAmlan Barua } 81351d1eed7SAmlan Barua 81451d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 81551d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 81651d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 81751d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 81851d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 81951d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 820b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 821b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 822b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 823b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 824bd59e6c6SAmlan Barua #endif 8259496c9aeSAmlan Barua break; 8263c3a9c75SAmlan Barua 8273c3a9c75SAmlan Barua default: 828bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8297a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 83026fbe8dcSKarl Rupp 8314f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 8324f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 833bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 834bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 835bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 836bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 837bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 838bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 839bd59e6c6SAmlan Barua #else 840e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 84126fbe8dcSKarl Rupp 842e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 84326fbe8dcSKarl Rupp 8447a21806cSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 84526fbe8dcSKarl Rupp 846e5d7f247SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 84726fbe8dcSKarl Rupp 848e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 849e5d7f247SAmlan Barua 850e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 851e5d7f247SAmlan Barua 852e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 853e5d7f247SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 854e5d7f247SAmlan Barua 855db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 856db4deed7SKarl Rupp else NM = 1; 857e5d7f247SAmlan Barua 8586971673cSAmlan Barua j = low; 8593564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 8606971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 8616971673cSAmlan Barua indx2[i] = j; 86226fbe8dcSKarl Rupp if (k%dim[ndim-1]==0) j+=NM; 8636971673cSAmlan Barua j++; 8646971673cSAmlan Barua } 8656971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 8666971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 8676971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 8686971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8696971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8706971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 871b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 872b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 873b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 874b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 875bd59e6c6SAmlan Barua #endif 8769496c9aeSAmlan Barua break; 8773c3a9c75SAmlan Barua } 878e81bb599SAmlan Barua } 8793564f093SHong Zhang PetscFunctionReturn(0); 8803c3a9c75SAmlan Barua } 8813c3a9c75SAmlan Barua 8823c3a9c75SAmlan Barua #undef __FUNCT__ 88374a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc" 8843564f093SHong Zhang /*@ 8853564f093SHong Zhang VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 8863564f093SHong Zhang 8873564f093SHong Zhang Collective on Mat 8883564f093SHong Zhang 8893564f093SHong Zhang Input Parameters: 8903564f093SHong Zhang + A - FFTW matrix 8913564f093SHong Zhang - x - FFTW vector 8923564f093SHong Zhang 8933564f093SHong Zhang Output Parameters: 8943564f093SHong Zhang . y - PETSc vector 8953564f093SHong Zhang 8963564f093SHong Zhang Level: intermediate 8973564f093SHong Zhang 8983564f093SHong Zhang Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 8993564f093SHong Zhang VecScatterFFTWToPetsc removes those extra zeros. 9003564f093SHong Zhang 9013564f093SHong Zhang .seealso: VecScatterPetscToFFTW() 9023564f093SHong Zhang @*/ 90374a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 9043c3a9c75SAmlan Barua { 9053c3a9c75SAmlan Barua PetscErrorCode ierr; 9065fd66863SKarl Rupp 9073c3a9c75SAmlan Barua PetscFunctionBegin; 90874a26884SAmlan Barua ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 9093c3a9c75SAmlan Barua PetscFunctionReturn(0); 9103c3a9c75SAmlan Barua } 91154efbe56SHong Zhang 9123c3a9c75SAmlan Barua #undef __FUNCT__ 91374a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 91474a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 9155b3e41ffSAmlan Barua { 9165b3e41ffSAmlan Barua PetscErrorCode ierr; 917ce94432eSBarry Smith MPI_Comm comm; 9185b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 9195b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 9209496c9aeSAmlan Barua PetscInt N = fft->N; 921b3655f67SAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim; 9229496c9aeSAmlan Barua PetscInt low; 9237a21806cSHong Zhang PetscMPIInt rank,size; 9247a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 9259496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 9265b3e41ffSAmlan Barua VecScatter vecscat; 9275b3e41ffSAmlan Barua IS list1,list2; 9289496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 9299496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 9309496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 9319496c9aeSAmlan Barua PetscInt N1, n1,NM; 9329496c9aeSAmlan Barua ptrdiff_t temp; 9339496c9aeSAmlan Barua #endif 9349496c9aeSAmlan Barua 9353564f093SHong Zhang PetscFunctionBegin; 936ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 9375b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 9385b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 9390298fd71SBarry Smith ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 9405b3e41ffSAmlan Barua 941e81bb599SAmlan Barua if (size==1) { 942b3655f67SAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 9436971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 9446971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9456971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9466971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 947b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 948e81bb599SAmlan Barua 9493564f093SHong Zhang } else { 950e81bb599SAmlan Barua switch (ndim) { 951e81bb599SAmlan Barua case 1: 95264657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 9537a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 95426fbe8dcSKarl Rupp 9554f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 9564f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,low,1,&list2); 95764657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 95864657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 95964657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 96064657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 96164657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 96264657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 96364657d84SAmlan Barua #else 9646a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 96564657d84SAmlan Barua #endif 9665b3e41ffSAmlan Barua break; 9675b3e41ffSAmlan Barua case 2: 968bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 9697a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 97026fbe8dcSKarl Rupp 9714f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 9724f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 973bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 974bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 975bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 976bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 977bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 978bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 979bd59e6c6SAmlan Barua #else 9807a21806cSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 98126fbe8dcSKarl Rupp 9825b3e41ffSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 9835b3e41ffSAmlan Barua 9849496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 9859496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 9865b3e41ffSAmlan Barua 987db4deed7SKarl Rupp if (dim[1]%2==0) NM = dim[1]+2; 988db4deed7SKarl Rupp else NM = dim[1]+1; 9895b3e41ffSAmlan Barua 9905b3e41ffSAmlan Barua for (i=0; i<local_n0; i++) { 9915b3e41ffSAmlan Barua for (j=0; j<dim[1]; j++) { 9925b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 9935b3e41ffSAmlan Barua tempindx1 = i*NM + j; 99426fbe8dcSKarl Rupp 9955b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 9965b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 9975b3e41ffSAmlan Barua } 9985b3e41ffSAmlan Barua } 9995b3e41ffSAmlan Barua 10005b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 10015b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 10025b3e41ffSAmlan Barua 10035b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 10045b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10055b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10065b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1007b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1008b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1009b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1010b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1011bd59e6c6SAmlan Barua #endif 10129496c9aeSAmlan Barua break; 10135b3e41ffSAmlan Barua case 3: 1014bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10157a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 101626fbe8dcSKarl Rupp 10174f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 10184f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 1019bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1020bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1021bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1022bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1023bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1024bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1025bd59e6c6SAmlan Barua #else 1026bd59e6c6SAmlan Barua 10277a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 102826fbe8dcSKarl Rupp 102951d1eed7SAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 103051d1eed7SAmlan Barua 10319496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 10329496c9aeSAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 103351d1eed7SAmlan Barua 1034db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 1035db4deed7SKarl Rupp else NM = dim[2]+1; 103651d1eed7SAmlan Barua 103751d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 103851d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 103951d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 104051d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 104151d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 104226fbe8dcSKarl Rupp 104351d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 104451d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 104551d1eed7SAmlan Barua } 104651d1eed7SAmlan Barua } 104751d1eed7SAmlan Barua } 104851d1eed7SAmlan Barua 104951d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 105051d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 105151d1eed7SAmlan Barua 105251d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 105351d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 105451d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 105551d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1056b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1057b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1058b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1059b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1060bd59e6c6SAmlan Barua #endif 10619496c9aeSAmlan Barua break; 10625b3e41ffSAmlan Barua default: 1063bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10647a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 106526fbe8dcSKarl Rupp 10664f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 10674f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1068bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1069bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1070bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1071bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1072bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1073bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1074bd59e6c6SAmlan Barua #else 1075ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 107626fbe8dcSKarl Rupp 1077ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 107826fbe8dcSKarl Rupp 1079c3eba89aSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 108026fbe8dcSKarl Rupp 1081ba6e06d0SAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 108226fbe8dcSKarl Rupp 1083ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1084ba6e06d0SAmlan Barua 1085ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1086ba6e06d0SAmlan Barua 1087ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1088ba6e06d0SAmlan Barua ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1089ba6e06d0SAmlan Barua 1090db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 1091db4deed7SKarl Rupp else NM = 1; 1092ba6e06d0SAmlan Barua 1093ba6e06d0SAmlan Barua j = low; 10943564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1095ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1096ba6e06d0SAmlan Barua indx2[i] = j; 10973564f093SHong Zhang if (k%dim[ndim-1]==0) j+=NM; 1098ba6e06d0SAmlan Barua j++; 1099ba6e06d0SAmlan Barua } 1100ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1101ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1102ba6e06d0SAmlan Barua 1103ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1104ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1105ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1106ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1107b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1108b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1109b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1110b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1111bd59e6c6SAmlan Barua #endif 11129496c9aeSAmlan Barua break; 11135b3e41ffSAmlan Barua } 1114e81bb599SAmlan Barua } 11153564f093SHong Zhang PetscFunctionReturn(0); 11165b3e41ffSAmlan Barua } 11175b3e41ffSAmlan Barua 11185b3e41ffSAmlan Barua #undef __FUNCT__ 11193c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 11203c3a9c75SAmlan Barua /* 11213564f093SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 11223564f093SHong Zhang 11233c3a9c75SAmlan Barua Options Database Keys: 11243c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 11253c3a9c75SAmlan Barua 11263c3a9c75SAmlan Barua Level: intermediate 11273c3a9c75SAmlan Barua 11283c3a9c75SAmlan Barua */ 11298cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1130b2b77a04SHong Zhang { 1131b2b77a04SHong Zhang PetscErrorCode ierr; 1132ce94432eSBarry Smith MPI_Comm comm; 1133b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1134b2b77a04SHong Zhang Mat_FFTW *fftw; 1135b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 11365274ed1bSHong Zhang const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 11375274ed1bSHong Zhang unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1138b2b77a04SHong Zhang PetscBool flg; 11394f48bc67SAmlan Barua PetscInt p_flag,partial_dim=1,ctr; 114011902ff2SHong Zhang PetscMPIInt size,rank; 11419496c9aeSAmlan Barua ptrdiff_t *pdim; 11429496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 11439496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11449496c9aeSAmlan Barua ptrdiff_t temp; 11458ca4c034SAmlan Barua PetscInt N1,tot_dim; 11464f48bc67SAmlan Barua #else 11474f48bc67SAmlan Barua PetscInt n1; 11489496c9aeSAmlan Barua #endif 11499496c9aeSAmlan Barua 1150b2b77a04SHong Zhang PetscFunctionBegin; 1151ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1152b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 115311902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1154c92418ccSAmlan Barua 11551878d478SAmlan Barua fftw_mpi_init(); 115611902ff2SHong Zhang pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 115711902ff2SHong Zhang pdim[0] = dim[0]; 11588ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11598ca4c034SAmlan Barua tot_dim = 2*dim[0]; 11608ca4c034SAmlan Barua #endif 11613564f093SHong Zhang for (ctr=1; ctr<ndim; ctr++) { 11626882372aSHong Zhang partial_dim *= dim[ctr]; 116311902ff2SHong Zhang pdim[ctr] = dim[ctr]; 11648ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1165db4deed7SKarl Rupp if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1166db4deed7SKarl Rupp else tot_dim *= dim[ctr]; 11678ca4c034SAmlan Barua #endif 11686882372aSHong Zhang } 11693c3a9c75SAmlan Barua 1170b2b77a04SHong Zhang if (size == 1) { 1171e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1172b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1173b2b77a04SHong Zhang n = N; 1174e81bb599SAmlan Barua #else 1175e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1176e81bb599SAmlan Barua n = tot_dim; 1177e81bb599SAmlan Barua #endif 1178e81bb599SAmlan Barua 1179b2b77a04SHong Zhang } else { 11807a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 1181b2b77a04SHong Zhang switch (ndim) { 1182b2b77a04SHong Zhang case 1: 11833c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11843c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1185e5d7f247SAmlan Barua #else 11867a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 118726fbe8dcSKarl Rupp 11886882372aSHong Zhang n = (PetscInt)local_n0; 11899496c9aeSAmlan Barua n1 = (PetscInt)local_n1; 11909496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1191e5d7f247SAmlan Barua #endif 1192b2b77a04SHong Zhang break; 1193b2b77a04SHong Zhang case 2: 11945b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 11957a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1196b2b77a04SHong Zhang /* 1197b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 11980ec8b6e3SBarry Smith PetscSynchronizedFlush(comm,PETSC_STDOUT); 1199b2b77a04SHong Zhang */ 1200b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1201b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 12025b3e41ffSAmlan Barua #else 1203c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 120426fbe8dcSKarl Rupp 12055b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1206c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 12075b3e41ffSAmlan Barua #endif 1208b2b77a04SHong Zhang break; 1209b2b77a04SHong Zhang case 3: 121051d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12117a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 121226fbe8dcSKarl Rupp 12136882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 12146882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 121551d1eed7SAmlan Barua #else 1216c3eba89aSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 121726fbe8dcSKarl Rupp 121851d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1219c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 122051d1eed7SAmlan Barua #endif 1221b2b77a04SHong Zhang break; 1222b2b77a04SHong Zhang default: 1223b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12247a21806cSHong Zhang fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 122526fbe8dcSKarl Rupp 12266882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 12276882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1228b3a17365SAmlan Barua #else 1229b3a17365SAmlan Barua temp = pdim[ndim-1]; 123026fbe8dcSKarl Rupp 1231b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 123226fbe8dcSKarl Rupp 1233c3eba89aSHong Zhang fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 123426fbe8dcSKarl Rupp 1235b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1236b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 123726fbe8dcSKarl Rupp 1238b3a17365SAmlan Barua pdim[ndim-1] = temp; 123926fbe8dcSKarl Rupp 1240c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1241b3a17365SAmlan Barua #endif 1242b2b77a04SHong Zhang break; 1243b2b77a04SHong Zhang } 1244b2b77a04SHong Zhang } 1245b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1246b00a9115SJed Brown ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1247b2b77a04SHong Zhang fft->data = (void*)fftw; 1248b2b77a04SHong Zhang 1249b2b77a04SHong Zhang fft->n = n; 12500c74a584SJed Brown fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1251e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 125226fbe8dcSKarl Rupp 12535e806cb5SMatthew G. Knepley ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 12548c1d5ab3SHong Zhang if (size == 1) { 1255*a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 1256*a1b6d50cSHong Zhang fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim); 1257*a1b6d50cSHong Zhang #else 12588c1d5ab3SHong Zhang fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1259*a1b6d50cSHong Zhang #endif 12608c1d5ab3SHong Zhang } 126126fbe8dcSKarl Rupp 1262b1301b2eSHong Zhang for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1263c92418ccSAmlan Barua 1264b2b77a04SHong Zhang fftw->p_forward = 0; 1265b2b77a04SHong Zhang fftw->p_backward = 0; 1266b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1267b2b77a04SHong Zhang 1268b2b77a04SHong Zhang if (size == 1) { 1269b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1270b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1271b2b77a04SHong Zhang } else { 1272b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1273b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1274b2b77a04SHong Zhang } 1275b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1276b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 12774a52bad8SHong Zhang A->preallocated = PETSC_TRUE; 127826fbe8dcSKarl Rupp 1279bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr); 1280bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1281bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1282b2b77a04SHong Zhang 1283b2b77a04SHong Zhang /* get runtime options */ 1284ce94432eSBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 12855274ed1bSHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 12865274ed1bSHong Zhang if (flg) { 12875274ed1bSHong Zhang fftw->p_flag = iplans[p_flag]; 12885274ed1bSHong Zhang } 12894a52bad8SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 1290b2b77a04SHong Zhang PetscFunctionReturn(0); 1291b2b77a04SHong Zhang } 12923c3a9c75SAmlan Barua 12933c3a9c75SAmlan Barua 12943c3a9c75SAmlan Barua 12953c3a9c75SAmlan Barua 1296