1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4c4762a1bSJed Brown Testing examples can be found in ~src/mat/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 10c6db04a5SJed Brown #include <fftw3-mpi.h> 11*0cf2e031SBarry Smith #else 12*0cf2e031SBarry Smith #include <fftw3.h> 13*0cf2e031SBarry Smith #endif 14b2b77a04SHong Zhang EXTERN_C_END 15b2b77a04SHong Zhang 16b2b77a04SHong Zhang typedef struct { 17b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 18a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 19a1b6d50cSHong Zhang fftw_iodim64 *iodims; 20a1b6d50cSHong Zhang #else 218c1d5ab3SHong Zhang fftw_iodim *iodims; 22a1b6d50cSHong Zhang #endif 23e5d7f247SAmlan Barua PetscInt partial_dim; 24b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 25b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 26b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 27b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 28b2b77a04SHong Zhang } Mat_FFTW; 29b2b77a04SHong Zhang 30b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 31b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 32*0cf2e031SBarry Smith extern PetscErrorCode MatDestroy_FFTW(Mat); 33*0cf2e031SBarry Smith extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat,Vec*,Vec*,Vec*); 34*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 35b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 36b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 37b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 38*0cf2e031SBarry Smith #endif 39b2b77a04SHong Zhang 40*0cf2e031SBarry Smith /* 41*0cf2e031SBarry Smith MatMult_SeqFFTW performs forward DFT 4297504071SAmlan Barua Input parameter: 433564f093SHong Zhang A - the matrix 4497504071SAmlan Barua x - the vector on which FDFT will be performed 4597504071SAmlan Barua 4697504071SAmlan Barua Output parameter: 4797504071SAmlan Barua y - vector that stores result of FDFT 4897504071SAmlan Barua */ 49b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 50b2b77a04SHong Zhang { 51b2b77a04SHong Zhang PetscErrorCode ierr; 52b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 53b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 54f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 55f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 561acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 57a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 58a1b6d50cSHong Zhang fftw_iodim64 *iodims; 59a1b6d50cSHong Zhang #else 608c1d5ab3SHong Zhang fftw_iodim *iodims; 61a1b6d50cSHong Zhang #endif 621acd80e4SHong Zhang PetscInt i; 631acd80e4SHong Zhang #endif 641acd80e4SHong Zhang PetscInt ndim = fft->ndim,*dim = fft->dim; 65b2b77a04SHong Zhang 66b2b77a04SHong Zhang PetscFunctionBegin; 67f4fca6d4SMatthew G. Knepley ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 68b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 69b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 70b2b77a04SHong Zhang switch (ndim) { 71b2b77a04SHong Zhang case 1: 7258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 73b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 7458a912c5SAmlan Barua #else 7558a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 7658a912c5SAmlan Barua #endif 77b2b77a04SHong Zhang break; 78b2b77a04SHong Zhang case 2: 7958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 80b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 8158a912c5SAmlan Barua #else 8258a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 8358a912c5SAmlan Barua #endif 84b2b77a04SHong Zhang break; 85b2b77a04SHong Zhang case 3: 8658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 87b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 8858a912c5SAmlan Barua #else 8958a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 9058a912c5SAmlan Barua #endif 91b2b77a04SHong Zhang break; 92b2b77a04SHong Zhang default: 9358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 94a1b6d50cSHong Zhang iodims = fftw->iodims; 95a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 968c1d5ab3SHong Zhang if (ndim) { 97a1b6d50cSHong Zhang iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1]; 988c1d5ab3SHong Zhang iodims[ndim-1].is = iodims[ndim-1].os = 1; 998c1d5ab3SHong Zhang for (i=ndim-2; i>=0; --i) { 100a1b6d50cSHong Zhang iodims[i].n = (ptrdiff_t)dim[i]; 1018c1d5ab3SHong Zhang iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 1028c1d5ab3SHong Zhang } 1038c1d5ab3SHong Zhang } 104a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 105a1b6d50cSHong Zhang #else 106a1b6d50cSHong Zhang if (ndim) { 107a1b6d50cSHong Zhang iodims[ndim-1].n = (int)dim[ndim-1]; 108a1b6d50cSHong Zhang iodims[ndim-1].is = iodims[ndim-1].os = 1; 109a1b6d50cSHong Zhang for (i=ndim-2; i>=0; --i) { 110a1b6d50cSHong Zhang iodims[i].n = (int)dim[i]; 111a1b6d50cSHong Zhang iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 112a1b6d50cSHong Zhang } 113a1b6d50cSHong Zhang } 114a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 115a1b6d50cSHong Zhang #endif 116a1b6d50cSHong Zhang 11758a912c5SAmlan Barua #else 118a31b9140SHong Zhang fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 11958a912c5SAmlan Barua #endif 120b2b77a04SHong Zhang break; 121b2b77a04SHong Zhang } 122f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *) x_array; 123b2b77a04SHong Zhang fftw->foutarray = y_array; 124b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 125b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 126b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 127b2b77a04SHong Zhang } else { /* use existing plan */ 128b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 1297e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 130b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 1317e4bc134SDominic Meiser #else 1327e4bc134SDominic Meiser fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array); 1337e4bc134SDominic Meiser #endif 134b2b77a04SHong Zhang } else { 135b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 136b2b77a04SHong Zhang } 137b2b77a04SHong Zhang } 138b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 139f4fca6d4SMatthew G. Knepley ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 140b2b77a04SHong Zhang PetscFunctionReturn(0); 141b2b77a04SHong Zhang } 142b2b77a04SHong Zhang 14397504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT 14497504071SAmlan Barua Input parameter: 1453564f093SHong Zhang A - the matrix 14697504071SAmlan Barua x - the vector on which BDFT will be performed 14797504071SAmlan Barua 14897504071SAmlan Barua Output parameter: 14997504071SAmlan Barua y - vector that stores result of BDFT 15097504071SAmlan Barua */ 15197504071SAmlan Barua 152b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 153b2b77a04SHong Zhang { 154b2b77a04SHong Zhang PetscErrorCode ierr; 155b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 156b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 157f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 158f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 159b2b77a04SHong Zhang PetscInt ndim = fft->ndim,*dim = fft->dim; 1601acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 161a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 162a1b6d50cSHong Zhang fftw_iodim64 *iodims = fftw->iodims; 163a1b6d50cSHong Zhang #else 164a1b6d50cSHong Zhang fftw_iodim *iodims = fftw->iodims; 165a1b6d50cSHong Zhang #endif 1661acd80e4SHong Zhang #endif 167b2b77a04SHong Zhang 168b2b77a04SHong Zhang PetscFunctionBegin; 169f4fca6d4SMatthew G. Knepley ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 170b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 171b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 172b2b77a04SHong Zhang switch (ndim) { 173b2b77a04SHong Zhang case 1: 17458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 175b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 17658a912c5SAmlan Barua #else 177e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 17858a912c5SAmlan Barua #endif 179b2b77a04SHong Zhang break; 180b2b77a04SHong Zhang case 2: 18158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 182b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 18358a912c5SAmlan Barua #else 184e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 18558a912c5SAmlan Barua #endif 186b2b77a04SHong Zhang break; 187b2b77a04SHong Zhang case 3: 18858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 189b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 19058a912c5SAmlan Barua #else 191e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 19258a912c5SAmlan Barua #endif 193b2b77a04SHong Zhang break; 194b2b77a04SHong Zhang default: 19558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 196a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 197a1b6d50cSHong Zhang fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 198a1b6d50cSHong Zhang #else 1998c1d5ab3SHong Zhang fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 200a1b6d50cSHong Zhang #endif 20158a912c5SAmlan Barua #else 202a31b9140SHong Zhang fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 20358a912c5SAmlan Barua #endif 204b2b77a04SHong Zhang break; 205b2b77a04SHong Zhang } 206f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *) x_array; 207b2b77a04SHong Zhang fftw->boutarray = y_array; 2082f613bf5SBarry Smith fftw_execute(fftw->p_backward); 209b2b77a04SHong Zhang } else { /* use existing plan */ 210b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 2117e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 212b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 2137e4bc134SDominic Meiser #else 2147e4bc134SDominic Meiser fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array); 2157e4bc134SDominic Meiser #endif 216b2b77a04SHong Zhang } else { 2172f613bf5SBarry Smith fftw_execute(fftw->p_backward); 218b2b77a04SHong Zhang } 219b2b77a04SHong Zhang } 220b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 221f4fca6d4SMatthew G. Knepley ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 222b2b77a04SHong Zhang PetscFunctionReturn(0); 223b2b77a04SHong Zhang } 224b2b77a04SHong Zhang 225*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 22697504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel 22797504071SAmlan Barua Input parameter: 2283564f093SHong Zhang A - the matrix 22997504071SAmlan Barua x - the vector on which FDFT will be performed 23097504071SAmlan Barua 23197504071SAmlan Barua Output parameter: 23297504071SAmlan Barua y - vector that stores result of FDFT 23397504071SAmlan Barua */ 234b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 235b2b77a04SHong Zhang { 236b2b77a04SHong Zhang PetscErrorCode ierr; 237b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 238b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 239f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 240f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 241c92418ccSAmlan Barua PetscInt ndim = fft->ndim,*dim = fft->dim; 242ce94432eSBarry Smith MPI_Comm comm; 243b2b77a04SHong Zhang 244b2b77a04SHong Zhang PetscFunctionBegin; 245ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 246f4fca6d4SMatthew G. Knepley ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 247b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 248b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 249b2b77a04SHong Zhang switch (ndim) { 250b2b77a04SHong Zhang case 1: 2513c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 252b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 253ae0a50aaSHong Zhang #else 2544f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 2553c3a9c75SAmlan Barua #endif 256b2b77a04SHong Zhang break; 257b2b77a04SHong Zhang case 2: 25897504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 259b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2603c3a9c75SAmlan Barua #else 2613c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2623c3a9c75SAmlan Barua #endif 263b2b77a04SHong Zhang break; 264b2b77a04SHong Zhang case 3: 2653c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 266b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2673c3a9c75SAmlan Barua #else 26851d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2693c3a9c75SAmlan Barua #endif 270b2b77a04SHong Zhang break; 271b2b77a04SHong Zhang default: 2723c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 273c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2743c3a9c75SAmlan Barua #else 2753c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2763c3a9c75SAmlan Barua #endif 277b2b77a04SHong Zhang break; 278b2b77a04SHong Zhang } 279f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *) x_array; 280b2b77a04SHong Zhang fftw->foutarray = y_array; 281b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 282b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 283b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 284b2b77a04SHong Zhang } else { /* use existing plan */ 285b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 286b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 287b2b77a04SHong Zhang } else { 288b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 289b2b77a04SHong Zhang } 290b2b77a04SHong Zhang } 291b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 292f4fca6d4SMatthew G. Knepley ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 293b2b77a04SHong Zhang PetscFunctionReturn(0); 294b2b77a04SHong Zhang } 295b2b77a04SHong Zhang 296*0cf2e031SBarry Smith /* 297*0cf2e031SBarry Smith MatMultTranspose_MPIFFTW performs parallel backward DFT 29897504071SAmlan Barua Input parameter: 2993564f093SHong Zhang A - the matrix 30097504071SAmlan Barua x - the vector on which BDFT will be performed 30197504071SAmlan Barua 30297504071SAmlan Barua Output parameter: 30397504071SAmlan Barua y - vector that stores result of BDFT 30497504071SAmlan Barua */ 305b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 306b2b77a04SHong Zhang { 307b2b77a04SHong Zhang PetscErrorCode ierr; 308b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 309b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 310f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 311f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 312c92418ccSAmlan Barua PetscInt ndim = fft->ndim,*dim = fft->dim; 313ce94432eSBarry Smith MPI_Comm comm; 314b2b77a04SHong Zhang 315b2b77a04SHong Zhang PetscFunctionBegin; 316ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 317f4fca6d4SMatthew G. Knepley ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 318b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 319b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 320b2b77a04SHong Zhang switch (ndim) { 321b2b77a04SHong Zhang case 1: 3223c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 323b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 324ae0a50aaSHong Zhang #else 3254f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 3263c3a9c75SAmlan Barua #endif 327b2b77a04SHong Zhang break; 328b2b77a04SHong Zhang case 2: 32997504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 330b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3313c3a9c75SAmlan Barua #else 3323c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3333c3a9c75SAmlan Barua #endif 334b2b77a04SHong Zhang break; 335b2b77a04SHong Zhang case 3: 3363c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 337b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3383c3a9c75SAmlan Barua #else 3393c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3403c3a9c75SAmlan Barua #endif 341b2b77a04SHong Zhang break; 342b2b77a04SHong Zhang default: 3433c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 344c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3453c3a9c75SAmlan Barua #else 3463c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3473c3a9c75SAmlan Barua #endif 348b2b77a04SHong Zhang break; 349b2b77a04SHong Zhang } 350f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *) x_array; 351b2b77a04SHong Zhang fftw->boutarray = y_array; 3522f613bf5SBarry Smith fftw_execute(fftw->p_backward); 353b2b77a04SHong Zhang } else { /* use existing plan */ 354b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 355b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 356b2b77a04SHong Zhang } else { 3572f613bf5SBarry Smith fftw_execute(fftw->p_backward); 358b2b77a04SHong Zhang } 359b2b77a04SHong Zhang } 360b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 361f4fca6d4SMatthew G. Knepley ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 362b2b77a04SHong Zhang PetscFunctionReturn(0); 363b2b77a04SHong Zhang } 364*0cf2e031SBarry Smith #endif 365b2b77a04SHong Zhang 366b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 367b2b77a04SHong Zhang { 368b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 369b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 370b2b77a04SHong Zhang PetscErrorCode ierr; 371b2b77a04SHong Zhang 372b2b77a04SHong Zhang PetscFunctionBegin; 373b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 374b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 3758c1d5ab3SHong Zhang if (fftw->iodims) { 3768c1d5ab3SHong Zhang free(fftw->iodims); 3778c1d5ab3SHong Zhang } 378bb5bf6f6SHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 379bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 380*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 3816ccf0b3eSHong Zhang fftw_mpi_cleanup(); 382*0cf2e031SBarry Smith #endif 383b2b77a04SHong Zhang PetscFunctionReturn(0); 384b2b77a04SHong Zhang } 385b2b77a04SHong Zhang 386*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 387c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 388b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 389b2b77a04SHong Zhang { 390b2b77a04SHong Zhang PetscErrorCode ierr; 391b2b77a04SHong Zhang PetscScalar *array; 392b2b77a04SHong Zhang 393b2b77a04SHong Zhang PetscFunctionBegin; 394b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 3952f613bf5SBarry Smith fftw_free((fftw_complex*)array); 396b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 397b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 398b2b77a04SHong Zhang PetscFunctionReturn(0); 399b2b77a04SHong Zhang } 400*0cf2e031SBarry Smith #endif 401b2b77a04SHong Zhang 402*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 4035b113e21Ss-sajid-ali static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin,Vec *fin_new) 4045b113e21Ss-sajid-ali { 4055b113e21Ss-sajid-ali PetscErrorCode ierr; 4065b113e21Ss-sajid-ali Mat A; 4075b113e21Ss-sajid-ali 4085b113e21Ss-sajid-ali PetscFunctionBegin; 4095b113e21Ss-sajid-ali ierr = PetscObjectQuery((PetscObject)fin,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr); 4105b113e21Ss-sajid-ali ierr = MatCreateVecsFFTW_FFTW(A,fin_new,NULL,NULL);CHKERRQ(ierr); 4115b113e21Ss-sajid-ali PetscFunctionReturn(0); 4125b113e21Ss-sajid-ali } 4135b113e21Ss-sajid-ali 4145b113e21Ss-sajid-ali static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout,Vec *fout_new) 4155b113e21Ss-sajid-ali { 4165b113e21Ss-sajid-ali PetscErrorCode ierr; 4175b113e21Ss-sajid-ali Mat A; 4185b113e21Ss-sajid-ali 4195b113e21Ss-sajid-ali PetscFunctionBegin; 4205b113e21Ss-sajid-ali ierr = PetscObjectQuery((PetscObject)fout,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr); 4215b113e21Ss-sajid-ali ierr = MatCreateVecsFFTW_FFTW(A,NULL,fout_new,NULL);CHKERRQ(ierr); 4225b113e21Ss-sajid-ali PetscFunctionReturn(0); 4235b113e21Ss-sajid-ali } 4245b113e21Ss-sajid-ali 4255b113e21Ss-sajid-ali static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) 4265b113e21Ss-sajid-ali { 4275b113e21Ss-sajid-ali PetscErrorCode ierr; 4285b113e21Ss-sajid-ali Mat A; 4295b113e21Ss-sajid-ali 4305b113e21Ss-sajid-ali PetscFunctionBegin; 4315b113e21Ss-sajid-ali ierr = PetscObjectQuery((PetscObject)bout,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr); 4325b113e21Ss-sajid-ali ierr = MatCreateVecsFFTW_FFTW(A,NULL,NULL,bout_new);CHKERRQ(ierr); 4335b113e21Ss-sajid-ali PetscFunctionReturn(0); 4345b113e21Ss-sajid-ali } 435*0cf2e031SBarry Smith #endif 4365b113e21Ss-sajid-ali 4374be45526SHong Zhang /*@ 4382a7a6963SBarry Smith MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 4394f7415efSAmlan Barua parallel layout determined by FFTW 4404f7415efSAmlan Barua 4414f7415efSAmlan Barua Collective on Mat 4424f7415efSAmlan Barua 4434f7415efSAmlan Barua Input Parameter: 4443564f093SHong Zhang . A - the matrix 4454f7415efSAmlan Barua 446d8d19677SJose E. Roman Output Parameters: 447cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW 4486b867d5aSJose E. Roman . y - (optional) output vector of forward FFTW 449cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW 4504f7415efSAmlan Barua 4514f7415efSAmlan Barua Level: advanced 4523564f093SHong Zhang 45397504071SAmlan Barua Note: The parallel layout of output of forward FFTW is always same as the input 45497504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 45597504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 45697504071SAmlan Barua Also note that we need to provide enough space while doing parallel real transform. 45797504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 45897504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 459e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 460e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two 461e0ec536eSAmlan Barua zeros if it is odd only one zero is needed. 462e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local 463e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for 464e0ec536eSAmlan Barua each processor and returns that. 4654f7415efSAmlan Barua 46675f45d78SBarry Smith .seealso: MatCreateFFT() 4674be45526SHong Zhang @*/ 4682a7a6963SBarry Smith PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 4694be45526SHong Zhang { 4704be45526SHong Zhang PetscErrorCode ierr; 471b4c3921fSHong Zhang 4724be45526SHong Zhang PetscFunctionBegin; 473163d334eSBarry Smith ierr = PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 4744be45526SHong Zhang PetscFunctionReturn(0); 4754be45526SHong Zhang } 4764be45526SHong Zhang 4772a7a6963SBarry Smith PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 4784f7415efSAmlan Barua { 4794f7415efSAmlan Barua PetscErrorCode ierr; 4804f7415efSAmlan Barua PetscMPIInt size,rank; 481ce94432eSBarry Smith MPI_Comm comm; 4824f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 4834f7415efSAmlan Barua 4844f7415efSAmlan Barua PetscFunctionBegin; 4854f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 4864f7415efSAmlan Barua PetscValidType(A,1); 487ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4884f7415efSAmlan Barua 489ffc4695bSBarry Smith ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr); 490ffc4695bSBarry Smith ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr); 4914f7415efSAmlan Barua if (size == 1) { /* sequential case */ 4924f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 493*0cf2e031SBarry Smith if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,fft->N,fin);CHKERRQ(ierr);} 494*0cf2e031SBarry Smith if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,fft->N,fout);CHKERRQ(ierr);} 495*0cf2e031SBarry Smith if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,fft->N,bout);CHKERRQ(ierr);} 4964f7415efSAmlan Barua #else 497*0cf2e031SBarry Smith if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,fft->n,fin);CHKERRQ(ierr);} 498*0cf2e031SBarry Smith if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,fft->n,fout);CHKERRQ(ierr);} 499*0cf2e031SBarry Smith if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,fft->n,bout);CHKERRQ(ierr);} 5004f7415efSAmlan Barua #endif 501*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 50297504071SAmlan Barua } else { /* parallel cases */ 503*0cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 504*0cf2e031SBarry Smith PetscInt ndim = fft->ndim,*dim = fft->dim; 5054f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 5069496c9aeSAmlan Barua ptrdiff_t local_n1; 5079496c9aeSAmlan Barua fftw_complex *data_fout; 5089496c9aeSAmlan Barua ptrdiff_t local_1_start; 5099496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 5109496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 5119496c9aeSAmlan Barua #else 5124f7415efSAmlan Barua double *data_finr,*data_boutr; 5136ccf0b3eSHong Zhang PetscInt n1,N1; 5149496c9aeSAmlan Barua ptrdiff_t temp; 5159496c9aeSAmlan Barua #endif 5169496c9aeSAmlan Barua 5174f7415efSAmlan Barua switch (ndim) { 5184f7415efSAmlan Barua case 1: 51957625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5204f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 52157625b84SAmlan Barua #else 52257625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 52357625b84SAmlan Barua if (fin) { 52457625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 525*0cf2e031SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n0,fft->N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5265b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 5275b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 52857625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 52957625b84SAmlan Barua } 53057625b84SAmlan Barua if (fout) { 53157625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 532*0cf2e031SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5335b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 5345b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 53557625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 53657625b84SAmlan Barua } 53757625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 53857625b84SAmlan Barua if (bout) { 53957625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 540*0cf2e031SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5415b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 5425b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 54357625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 54457625b84SAmlan Barua } 54557625b84SAmlan Barua break; 54657625b84SAmlan Barua #endif 5474f7415efSAmlan Barua case 2: 54897504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 5494f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5504f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 5514f7415efSAmlan Barua if (fin) { 5524f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 553778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 5545b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 5555b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5564f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5574f7415efSAmlan Barua } 55857625b84SAmlan Barua if (fout) { 55957625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 560778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5615b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 5625b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 56357625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 56457625b84SAmlan Barua } 5655b113e21Ss-sajid-ali if (bout) { 5665b113e21Ss-sajid-ali data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 5675b113e21Ss-sajid-ali ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 5685b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 5695b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 5705b113e21Ss-sajid-ali (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5715b113e21Ss-sajid-ali } 5724f7415efSAmlan Barua #else 5734f7415efSAmlan Barua /* Get local size */ 5744f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 5754f7415efSAmlan Barua if (fin) { 5764f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 577*0cf2e031SBarry Smith ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5785b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 5795b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5804f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5814f7415efSAmlan Barua } 5824f7415efSAmlan Barua if (fout) { 5834f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 584*0cf2e031SBarry Smith ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5855b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 5865b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 5874f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5884f7415efSAmlan Barua } 5894f7415efSAmlan Barua if (bout) { 5904f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 591*0cf2e031SBarry Smith ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5925b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 5935b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 5944f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5954f7415efSAmlan Barua } 5964f7415efSAmlan Barua #endif 5974f7415efSAmlan Barua break; 5984f7415efSAmlan Barua case 3: 5994f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6004f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 6014f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 6024f7415efSAmlan Barua if (fin) { 6034f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 604778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 6055b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6065b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6074f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6084f7415efSAmlan Barua } 6095b113e21Ss-sajid-ali if (fout) { 6105b113e21Ss-sajid-ali data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6115b113e21Ss-sajid-ali ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6125b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6135b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6145b113e21Ss-sajid-ali (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6155b113e21Ss-sajid-ali } 6164f7415efSAmlan Barua if (bout) { 6174f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 618778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 6195b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6205b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6214f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6224f7415efSAmlan Barua } 6234f7415efSAmlan Barua #else 6240c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 6250c9b18e4SAmlan Barua if (fin) { 6260c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 627*0cf2e031SBarry Smith ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 6285b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6295b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6300c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6310c9b18e4SAmlan Barua } 6320c9b18e4SAmlan Barua if (fout) { 6330c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 634*0cf2e031SBarry Smith ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6355b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6365b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6370c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6380c9b18e4SAmlan Barua } 6390c9b18e4SAmlan Barua if (bout) { 6400c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 641*0cf2e031SBarry Smith ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 6425b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6435b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6440c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6450c9b18e4SAmlan Barua } 6464f7415efSAmlan Barua #endif 6474f7415efSAmlan Barua break; 6484f7415efSAmlan Barua default: 6494f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6504f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 6514f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 6524f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 653*0cf2e031SBarry Smith N1 = 2*fft->N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 6544f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 6554f7415efSAmlan Barua 6564f7415efSAmlan Barua if (fin) { 6574f7415efSAmlan Barua data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 658*0cf2e031SBarry Smith ierr = VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 6595b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6605b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6614f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6624f7415efSAmlan Barua } 6635b113e21Ss-sajid-ali if (fout) { 6645b113e21Ss-sajid-ali data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 665*0cf2e031SBarry Smith ierr = VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6665b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6675b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6685b113e21Ss-sajid-ali (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6695b113e21Ss-sajid-ali } 6704f7415efSAmlan Barua if (bout) { 6714f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 672*0cf2e031SBarry Smith ierr = VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 6735b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6745b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6759496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6764f7415efSAmlan Barua } 6774f7415efSAmlan Barua #else 6780c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 6790c9b18e4SAmlan Barua if (fin) { 6800c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 681*0cf2e031SBarry Smith ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 6825b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6835b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6840c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6850c9b18e4SAmlan Barua } 6860c9b18e4SAmlan Barua if (fout) { 6870c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 688*0cf2e031SBarry Smith ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6895b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6905b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6910c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6920c9b18e4SAmlan Barua } 6930c9b18e4SAmlan Barua if (bout) { 6940c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 695*0cf2e031SBarry Smith ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 6965b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6975b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6980c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6990c9b18e4SAmlan Barua } 7004f7415efSAmlan Barua #endif 7014f7415efSAmlan Barua break; 7024f7415efSAmlan Barua } 703f9d91177SJunchao Zhang /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 704f9d91177SJunchao Zhang v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 705f9d91177SJunchao Zhang memory leaks. We void these pointers here to avoid acident uses. 706f9d91177SJunchao Zhang */ 707f9d91177SJunchao Zhang if (fin) (*fin)->ops->replacearray = NULL; 708f9d91177SJunchao Zhang if (fout) (*fout)->ops->replacearray = NULL; 709f9d91177SJunchao Zhang if (bout) (*bout)->ops->replacearray = NULL; 710*0cf2e031SBarry Smith #endif 7114f7415efSAmlan Barua } 7124f7415efSAmlan Barua PetscFunctionReturn(0); 7134f7415efSAmlan Barua } 7144f7415efSAmlan Barua 7153564f093SHong Zhang /*@ 7163564f093SHong Zhang VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 71754efbe56SHong Zhang 7183564f093SHong Zhang Collective on Mat 7193564f093SHong Zhang 7203564f093SHong Zhang Input Parameters: 7213564f093SHong Zhang + A - FFTW matrix 7223564f093SHong Zhang - x - the PETSc vector 7233564f093SHong Zhang 7243564f093SHong Zhang Output Parameters: 7253564f093SHong Zhang . y - the FFTW vector 7263564f093SHong Zhang 727b2b77a04SHong Zhang Options Database Keys: 7283564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags 729b2b77a04SHong Zhang 730b2b77a04SHong Zhang Level: intermediate 7313564f093SHong Zhang 73297504071SAmlan Barua Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 73397504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 73497504071SAmlan Barua zeros. This routine does that job by scattering operation. 73597504071SAmlan Barua 7363564f093SHong Zhang .seealso: VecScatterFFTWToPetsc() 7373564f093SHong Zhang @*/ 7383564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 7393564f093SHong Zhang { 7403564f093SHong Zhang PetscErrorCode ierr; 741b2b77a04SHong Zhang 7423564f093SHong Zhang PetscFunctionBegin; 743163d334eSBarry Smith ierr = PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 7443564f093SHong Zhang PetscFunctionReturn(0); 7453564f093SHong Zhang } 7463c3a9c75SAmlan Barua 74774a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 7483c3a9c75SAmlan Barua { 7493c3a9c75SAmlan Barua PetscErrorCode ierr; 750ce94432eSBarry Smith MPI_Comm comm; 7513c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 7529496c9aeSAmlan Barua PetscInt low; 7537a21806cSHong Zhang PetscMPIInt rank,size; 7547a21806cSHong Zhang PetscInt vsize,vsize1; 7553c3a9c75SAmlan Barua VecScatter vecscat; 756*0cf2e031SBarry Smith IS list1; 7573c3a9c75SAmlan Barua 7583564f093SHong Zhang PetscFunctionBegin; 759ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 760ffc4695bSBarry Smith ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr); 761ffc4695bSBarry Smith ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr); 7621e1ea65dSPierre Jolivet ierr = VecGetOwnershipRange(y,&low,NULL);CHKERRQ(ierr); 7633c3a9c75SAmlan Barua 7643564f093SHong Zhang if (size==1) { 7658ca4c034SAmlan Barua ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 7668ca4c034SAmlan Barua ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 767*0cf2e031SBarry Smith ierr = ISCreateStride(PETSC_COMM_SELF,fft->N,0,1,&list1);CHKERRQ(ierr); 7689448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 7696971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7706971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7716971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 772b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 773*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 7743564f093SHong Zhang } else { 775*0cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 776*0cf2e031SBarry Smith PetscInt ndim = fft->ndim,*dim = fft->dim; 777*0cf2e031SBarry Smith ptrdiff_t local_n0,local_0_start; 778*0cf2e031SBarry Smith ptrdiff_t local_n1,local_1_start; 779*0cf2e031SBarry Smith IS list2; 780*0cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 781*0cf2e031SBarry Smith PetscInt i,j,k,partial_dim; 782*0cf2e031SBarry Smith PetscInt *indx1, *indx2, tempindx, tempindx1; 783*0cf2e031SBarry Smith PetscInt NM; 784*0cf2e031SBarry Smith ptrdiff_t temp; 785*0cf2e031SBarry Smith #endif 786*0cf2e031SBarry Smith 7873c3a9c75SAmlan Barua switch (ndim) { 7883c3a9c75SAmlan Barua case 1: 78964657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7907a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 79126fbe8dcSKarl Rupp 7921e1ea65dSPierre Jolivet ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);CHKERRQ(ierr); 7931e1ea65dSPierre Jolivet ierr = ISCreateStride(comm,local_n0,low,1,&list2);CHKERRQ(ierr); 7949448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 79564657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 79664657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 79764657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 79864657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 79964657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 80064657d84SAmlan Barua #else 801e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 80264657d84SAmlan Barua #endif 8033c3a9c75SAmlan Barua break; 8043c3a9c75SAmlan Barua case 2: 805bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8067a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 80726fbe8dcSKarl Rupp 8081e1ea65dSPierre Jolivet ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);CHKERRQ(ierr); 8091e1ea65dSPierre Jolivet ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);CHKERRQ(ierr); 8109448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 811bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 812bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 813bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 814bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 815bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 816bd59e6c6SAmlan Barua #else 817c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 81826fbe8dcSKarl Rupp 819854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 820854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 8213c3a9c75SAmlan Barua 8223564f093SHong Zhang if (dim[1]%2==0) { 8233c3a9c75SAmlan Barua NM = dim[1]+2; 8243564f093SHong Zhang } else { 8253c3a9c75SAmlan Barua NM = dim[1]+1; 8263564f093SHong Zhang } 8273c3a9c75SAmlan Barua for (i=0; i<local_n0; i++) { 8283c3a9c75SAmlan Barua for (j=0; j<dim[1]; j++) { 8293c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 8303c3a9c75SAmlan Barua tempindx1 = i*NM + j; 83126fbe8dcSKarl Rupp 8325b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 8333c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 8343c3a9c75SAmlan Barua } 8353c3a9c75SAmlan Barua } 8363c3a9c75SAmlan Barua 8373c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 8383c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 8393c3a9c75SAmlan Barua 8409448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 841f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 842f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 844b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 845b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 846b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 847b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 848bd59e6c6SAmlan Barua #endif 8499496c9aeSAmlan Barua break; 8503c3a9c75SAmlan Barua 8513c3a9c75SAmlan Barua case 3: 852bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8537a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 85426fbe8dcSKarl Rupp 8551e1ea65dSPierre Jolivet ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);CHKERRQ(ierr); 8561e1ea65dSPierre Jolivet ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);CHKERRQ(ierr); 8579448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 858bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 861bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 862bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 863bd59e6c6SAmlan Barua #else 864c4762a1bSJed Brown /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 865f1ade23cSHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform"); 8667a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 86726fbe8dcSKarl Rupp 868854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 869854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 87051d1eed7SAmlan Barua 871db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 872db4deed7SKarl Rupp else NM = dim[2]+1; 87351d1eed7SAmlan Barua 87451d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 87551d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 87651d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 87751d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 87851d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 87926fbe8dcSKarl Rupp 88051d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 88151d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 88251d1eed7SAmlan Barua } 88351d1eed7SAmlan Barua } 88451d1eed7SAmlan Barua } 88551d1eed7SAmlan Barua 88651d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 88751d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 8889448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 88951d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 89051d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 89151d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 892b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 893b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 894b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 895b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 896bd59e6c6SAmlan Barua #endif 8979496c9aeSAmlan Barua break; 8983c3a9c75SAmlan Barua 8993c3a9c75SAmlan Barua default: 900bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 9017a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 90226fbe8dcSKarl Rupp 9031e1ea65dSPierre Jolivet ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);CHKERRQ(ierr); 9041e1ea65dSPierre Jolivet ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);CHKERRQ(ierr); 9059448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 906bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 907bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 908bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 909bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 910bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 911bd59e6c6SAmlan Barua #else 912c4762a1bSJed Brown /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 913f1ade23cSHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform"); 914e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 91526fbe8dcSKarl Rupp 916e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 91726fbe8dcSKarl Rupp 9187a21806cSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 91926fbe8dcSKarl Rupp 920e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 921e5d7f247SAmlan Barua 922e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 923e5d7f247SAmlan Barua 924854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 925854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 926e5d7f247SAmlan Barua 927db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 928db4deed7SKarl Rupp else NM = 1; 929e5d7f247SAmlan Barua 9306971673cSAmlan Barua j = low; 9313564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 9326971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 9336971673cSAmlan Barua indx2[i] = j; 93426fbe8dcSKarl Rupp if (k%dim[ndim-1]==0) j+=NM; 9356971673cSAmlan Barua j++; 9366971673cSAmlan Barua } 9376971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 9386971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 9399448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 9406971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9416971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9426971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 943b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 944b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 945b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 946b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 947bd59e6c6SAmlan Barua #endif 9489496c9aeSAmlan Barua break; 9493c3a9c75SAmlan Barua } 950*0cf2e031SBarry Smith #endif 951e81bb599SAmlan Barua } 9523564f093SHong Zhang PetscFunctionReturn(0); 9533c3a9c75SAmlan Barua } 9543c3a9c75SAmlan Barua 9553564f093SHong Zhang /*@ 9563564f093SHong Zhang VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 9573564f093SHong Zhang 9583564f093SHong Zhang Collective on Mat 9593564f093SHong Zhang 9603564f093SHong Zhang Input Parameters: 9613564f093SHong Zhang + A - FFTW matrix 9623564f093SHong Zhang - x - FFTW vector 9633564f093SHong Zhang 9643564f093SHong Zhang Output Parameters: 9653564f093SHong Zhang . y - PETSc vector 9663564f093SHong Zhang 9673564f093SHong Zhang Level: intermediate 9683564f093SHong Zhang 9693564f093SHong Zhang Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 9703564f093SHong Zhang VecScatterFFTWToPetsc removes those extra zeros. 9713564f093SHong Zhang 9723564f093SHong Zhang .seealso: VecScatterPetscToFFTW() 9733564f093SHong Zhang @*/ 97474a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 9753c3a9c75SAmlan Barua { 9763c3a9c75SAmlan Barua PetscErrorCode ierr; 9775fd66863SKarl Rupp 9783c3a9c75SAmlan Barua PetscFunctionBegin; 979163d334eSBarry Smith ierr = PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 9803c3a9c75SAmlan Barua PetscFunctionReturn(0); 9813c3a9c75SAmlan Barua } 98254efbe56SHong Zhang 98374a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 9845b3e41ffSAmlan Barua { 9855b3e41ffSAmlan Barua PetscErrorCode ierr; 986ce94432eSBarry Smith MPI_Comm comm; 9875b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 9889496c9aeSAmlan Barua PetscInt low; 9897a21806cSHong Zhang PetscMPIInt rank,size; 9905b3e41ffSAmlan Barua VecScatter vecscat; 991*0cf2e031SBarry Smith IS list1; 9929496c9aeSAmlan Barua 9933564f093SHong Zhang PetscFunctionBegin; 994ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 995ffc4695bSBarry Smith ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr); 996ffc4695bSBarry Smith ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr); 9970298fd71SBarry Smith ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 9985b3e41ffSAmlan Barua 999e81bb599SAmlan Barua if (size==1) { 1000*0cf2e031SBarry Smith ierr = ISCreateStride(comm,fft->N,0,1,&list1);CHKERRQ(ierr); 10019448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 10026971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10036971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10046971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1005b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1006e81bb599SAmlan Barua 1007*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 10083564f093SHong Zhang } else { 1009*0cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 1010*0cf2e031SBarry Smith PetscInt ndim = fft->ndim,*dim = fft->dim; 1011*0cf2e031SBarry Smith ptrdiff_t local_n0,local_0_start; 1012*0cf2e031SBarry Smith ptrdiff_t local_n1,local_1_start; 1013*0cf2e031SBarry Smith IS list2; 1014*0cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 1015*0cf2e031SBarry Smith PetscInt i,j,k,partial_dim; 1016*0cf2e031SBarry Smith PetscInt *indx1, *indx2, tempindx, tempindx1; 1017*0cf2e031SBarry Smith PetscInt NM; 1018*0cf2e031SBarry Smith ptrdiff_t temp; 1019*0cf2e031SBarry Smith #endif 1020e81bb599SAmlan Barua switch (ndim) { 1021e81bb599SAmlan Barua case 1: 102264657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10237a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 102426fbe8dcSKarl Rupp 10251e1ea65dSPierre Jolivet ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);CHKERRQ(ierr); 10261e1ea65dSPierre Jolivet ierr = ISCreateStride(comm,local_n1,low,1,&list2);CHKERRQ(ierr); 10279448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 102864657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 102964657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 103064657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 103164657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 103264657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 103364657d84SAmlan Barua #else 10346a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 103564657d84SAmlan Barua #endif 10365b3e41ffSAmlan Barua break; 10375b3e41ffSAmlan Barua case 2: 1038bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10397a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 104026fbe8dcSKarl Rupp 10411e1ea65dSPierre Jolivet ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);CHKERRQ(ierr); 10421e1ea65dSPierre Jolivet ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);CHKERRQ(ierr); 10439448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1044bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1045bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1046bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1047bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1048bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1049bd59e6c6SAmlan Barua #else 10507a21806cSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 105126fbe8dcSKarl Rupp 1052854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 1053854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 10545b3e41ffSAmlan Barua 1055db4deed7SKarl Rupp if (dim[1]%2==0) NM = dim[1]+2; 1056db4deed7SKarl Rupp else NM = dim[1]+1; 10575b3e41ffSAmlan Barua 10585b3e41ffSAmlan Barua for (i=0; i<local_n0; i++) { 10595b3e41ffSAmlan Barua for (j=0; j<dim[1]; j++) { 10605b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 10615b3e41ffSAmlan Barua tempindx1 = i*NM + j; 106226fbe8dcSKarl Rupp 10635b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 10645b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 10655b3e41ffSAmlan Barua } 10665b3e41ffSAmlan Barua } 10675b3e41ffSAmlan Barua 10685b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 10695b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 10705b3e41ffSAmlan Barua 10719448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 10725b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10735b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10745b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1075b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1076b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1077b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1078b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1079bd59e6c6SAmlan Barua #endif 10809496c9aeSAmlan Barua break; 10815b3e41ffSAmlan Barua case 3: 1082bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10837a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 108426fbe8dcSKarl Rupp 10851e1ea65dSPierre Jolivet ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);CHKERRQ(ierr); 10861e1ea65dSPierre Jolivet ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);CHKERRQ(ierr); 10879448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1088bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1089bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1091bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1092bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1093bd59e6c6SAmlan Barua #else 10947a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 109526fbe8dcSKarl Rupp 1096854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1097854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 109851d1eed7SAmlan Barua 1099db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 1100db4deed7SKarl Rupp else NM = dim[2]+1; 110151d1eed7SAmlan Barua 110251d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 110351d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 110451d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 110551d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 110651d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 110726fbe8dcSKarl Rupp 110851d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 110951d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 111051d1eed7SAmlan Barua } 111151d1eed7SAmlan Barua } 111251d1eed7SAmlan Barua } 111351d1eed7SAmlan Barua 111451d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 111551d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 111651d1eed7SAmlan Barua 11179448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 111851d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 111951d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 112051d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1121b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1122b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1123b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1124b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1125bd59e6c6SAmlan Barua #endif 11269496c9aeSAmlan Barua break; 11275b3e41ffSAmlan Barua default: 1128bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 11297a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 113026fbe8dcSKarl Rupp 11311e1ea65dSPierre Jolivet ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);CHKERRQ(ierr); 11321e1ea65dSPierre Jolivet ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);CHKERRQ(ierr); 11339448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1134bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1135bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1136bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1137bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1138bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1139bd59e6c6SAmlan Barua #else 1140ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 114126fbe8dcSKarl Rupp 1142ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 114326fbe8dcSKarl Rupp 1144c3eba89aSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 114526fbe8dcSKarl Rupp 1146ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1147ba6e06d0SAmlan Barua 1148ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1149ba6e06d0SAmlan Barua 1150854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1151854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1152ba6e06d0SAmlan Barua 1153db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 1154db4deed7SKarl Rupp else NM = 1; 1155ba6e06d0SAmlan Barua 1156ba6e06d0SAmlan Barua j = low; 11573564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1158ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1159ba6e06d0SAmlan Barua indx2[i] = j; 11603564f093SHong Zhang if (k%dim[ndim-1]==0) j+=NM; 1161ba6e06d0SAmlan Barua j++; 1162ba6e06d0SAmlan Barua } 1163ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1164ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1165ba6e06d0SAmlan Barua 11669448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1167ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1168ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1169ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1170b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1171b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1172b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1173b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1174bd59e6c6SAmlan Barua #endif 11759496c9aeSAmlan Barua break; 11765b3e41ffSAmlan Barua } 1177*0cf2e031SBarry Smith #endif 1178e81bb599SAmlan Barua } 11793564f093SHong Zhang PetscFunctionReturn(0); 11805b3e41ffSAmlan Barua } 11815b3e41ffSAmlan Barua 11823c3a9c75SAmlan Barua /* 11833564f093SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 11843564f093SHong Zhang 11853c3a9c75SAmlan Barua Options Database Keys: 11863c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 11873c3a9c75SAmlan Barua 11883c3a9c75SAmlan Barua Level: intermediate 11893c3a9c75SAmlan Barua 11903c3a9c75SAmlan Barua */ 11918cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1192b2b77a04SHong Zhang { 1193b2b77a04SHong Zhang PetscErrorCode ierr; 1194ce94432eSBarry Smith MPI_Comm comm; 1195b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 1196b2b77a04SHong Zhang Mat_FFTW *fftw; 1197*0cf2e031SBarry Smith PetscInt ndim = fft->ndim,*dim = fft->dim; 11985274ed1bSHong Zhang const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 11995274ed1bSHong Zhang unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1200b2b77a04SHong Zhang PetscBool flg; 12014f48bc67SAmlan Barua PetscInt p_flag,partial_dim=1,ctr; 120211902ff2SHong Zhang PetscMPIInt size,rank; 12039496c9aeSAmlan Barua ptrdiff_t *pdim; 12049496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1205*0cf2e031SBarry Smith PetscInt tot_dim; 12069496c9aeSAmlan Barua #endif 12079496c9aeSAmlan Barua 1208b2b77a04SHong Zhang PetscFunctionBegin; 1209ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1210ffc4695bSBarry Smith ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr); 1211ffc4695bSBarry Smith ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr); 1212c92418ccSAmlan Barua 1213*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 12141878d478SAmlan Barua fftw_mpi_init(); 1215*0cf2e031SBarry Smith #endif 121611902ff2SHong Zhang pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 121711902ff2SHong Zhang pdim[0] = dim[0]; 12188ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12198ca4c034SAmlan Barua tot_dim = 2*dim[0]; 12208ca4c034SAmlan Barua #endif 12213564f093SHong Zhang for (ctr=1; ctr<ndim; ctr++) { 12226882372aSHong Zhang partial_dim *= dim[ctr]; 122311902ff2SHong Zhang pdim[ctr] = dim[ctr]; 12248ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1225db4deed7SKarl Rupp if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1226db4deed7SKarl Rupp else tot_dim *= dim[ctr]; 12278ca4c034SAmlan Barua #endif 12286882372aSHong Zhang } 12293c3a9c75SAmlan Barua 1230b2b77a04SHong Zhang if (size == 1) { 1231e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1232*0cf2e031SBarry Smith ierr = MatSetSizes(A,fft->N,fft->N,fft->N,fft->N);CHKERRQ(ierr); 1233*0cf2e031SBarry Smith fft->n = fft->N; 1234e81bb599SAmlan Barua #else 1235e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1236*0cf2e031SBarry Smith fft->n = tot_dim; 1237*0cf2e031SBarry Smith #endif 1238*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 1239*0cf2e031SBarry Smith } else { 1240*0cf2e031SBarry Smith ptrdiff_t local_n0,local_0_start,local_n1,local_1_start; 1241*0cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX) 1242*0cf2e031SBarry Smith ptrdiff_t temp; 1243*0cf2e031SBarry Smith PetscInt N1; 1244e81bb599SAmlan Barua #endif 1245e81bb599SAmlan Barua 1246b2b77a04SHong Zhang switch (ndim) { 1247b2b77a04SHong Zhang case 1: 12483c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12493c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1250e5d7f247SAmlan Barua #else 12517a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1252*0cf2e031SBarry Smith fft->n = (PetscInt)local_n0; 1253*0cf2e031SBarry Smith ierr = MatSetSizes(A,local_n1,fft->n,fft->N,fft->N);CHKERRQ(ierr); 1254e5d7f247SAmlan Barua #endif 1255b2b77a04SHong Zhang break; 1256b2b77a04SHong Zhang case 2: 12575b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 12587a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1259*0cf2e031SBarry Smith fft->n = (PetscInt)local_n0*dim[1]; 1260*0cf2e031SBarry Smith ierr = MatSetSizes(A,fft->n,fft->n,fft->N,fft->N);CHKERRQ(ierr); 12615b3e41ffSAmlan Barua #else 1262c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 126326fbe8dcSKarl Rupp 1264*0cf2e031SBarry Smith fft->n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1265*0cf2e031SBarry Smith ierr = MatSetSizes(A,fft->n,fft->n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));CHKERRQ(ierr); 12665b3e41ffSAmlan Barua #endif 1267b2b77a04SHong Zhang break; 1268b2b77a04SHong Zhang case 3: 126951d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12707a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 127126fbe8dcSKarl Rupp 1272*0cf2e031SBarry Smith fft->n = (PetscInt)local_n0*dim[1]*dim[2]; 1273*0cf2e031SBarry Smith ierr = MatSetSizes(A,fft->n,fft->n,fft->N,fft->N);CHKERRQ(ierr); 127451d1eed7SAmlan Barua #else 1275c3eba89aSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 127626fbe8dcSKarl Rupp 1277*0cf2e031SBarry Smith fft->n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1278*0cf2e031SBarry Smith ierr = MatSetSizes(A,fft->n,fft->n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));CHKERRQ(ierr); 127951d1eed7SAmlan Barua #endif 1280b2b77a04SHong Zhang break; 1281b2b77a04SHong Zhang default: 1282b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12837a21806cSHong Zhang fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 128426fbe8dcSKarl Rupp 1285*0cf2e031SBarry Smith fft->n = (PetscInt)local_n0*partial_dim; 1286*0cf2e031SBarry Smith ierr = MatSetSizes(A,fft->n,fft->n,fft->N,fft->N);CHKERRQ(ierr); 1287b3a17365SAmlan Barua #else 1288b3a17365SAmlan Barua temp = pdim[ndim-1]; 128926fbe8dcSKarl Rupp 1290b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 129126fbe8dcSKarl Rupp 1292c3eba89aSHong Zhang fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 129326fbe8dcSKarl Rupp 1294*0cf2e031SBarry Smith fft->n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1295*0cf2e031SBarry Smith N1 = 2*fft->N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 129626fbe8dcSKarl Rupp 1297b3a17365SAmlan Barua pdim[ndim-1] = temp; 129826fbe8dcSKarl Rupp 1299*0cf2e031SBarry Smith ierr = MatSetSizes(A,fft->n,fft->n,N1,N1);CHKERRQ(ierr); 1300b3a17365SAmlan Barua #endif 1301b2b77a04SHong Zhang break; 1302b2b77a04SHong Zhang } 1303*0cf2e031SBarry Smith #endif 1304b2b77a04SHong Zhang } 13052277172eSMark Adams free(pdim); 1306b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1307b00a9115SJed Brown ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1308b2b77a04SHong Zhang fft->data = (void*)fftw; 1309b2b77a04SHong Zhang 13100c74a584SJed Brown fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1311e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 131226fbe8dcSKarl Rupp 13135e806cb5SMatthew G. Knepley ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 13148c1d5ab3SHong Zhang if (size == 1) { 1315a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 1316a1b6d50cSHong Zhang fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim); 1317a1b6d50cSHong Zhang #else 13188c1d5ab3SHong Zhang fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1319a1b6d50cSHong Zhang #endif 13208c1d5ab3SHong Zhang } 132126fbe8dcSKarl Rupp 1322b1301b2eSHong Zhang for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1323c92418ccSAmlan Barua 1324f4259b30SLisandro Dalcin fftw->p_forward = NULL; 1325f4259b30SLisandro Dalcin fftw->p_backward = NULL; 1326b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1327b2b77a04SHong Zhang 1328b2b77a04SHong Zhang if (size == 1) { 1329b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1330b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1331*0cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI) 1332b2b77a04SHong Zhang } else { 1333b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1334b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1335*0cf2e031SBarry Smith #endif 1336b2b77a04SHong Zhang } 1337b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1338b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 13394a52bad8SHong Zhang A->preallocated = PETSC_TRUE; 134026fbe8dcSKarl Rupp 13412a7a6963SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr); 1342bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1343bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1344b2b77a04SHong Zhang 1345b2b77a04SHong Zhang /* get runtime options */ 1346ce94432eSBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 13475274ed1bSHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 13485274ed1bSHong Zhang if (flg) { 13495274ed1bSHong Zhang fftw->p_flag = iplans[p_flag]; 13505274ed1bSHong Zhang } 13514a52bad8SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 1352b2b77a04SHong Zhang PetscFunctionReturn(0); 1353b2b77a04SHong Zhang } 13543c3a9c75SAmlan Barua 1355