1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4*c4762a1bSJed Brown Testing examples can be found in ~src/mat/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 15a1b6d50cSHong Zhang fftw_iodim64 *iodims; 16a1b6d50cSHong Zhang #else 178c1d5ab3SHong Zhang fftw_iodim *iodims; 18a1b6d50cSHong Zhang #endif 19e5d7f247SAmlan Barua PetscInt partial_dim; 20b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 21b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 22b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 23b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 24b2b77a04SHong Zhang } Mat_FFTW; 25b2b77a04SHong Zhang 26b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 27b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 28b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 29b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 30b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 31b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 325b113e21Ss-sajid-ali extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat,Vec*,Vec*,Vec*); 33b2b77a04SHong Zhang 3497504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel 3597504071SAmlan Barua Input parameter: 363564f093SHong Zhang A - the matrix 3797504071SAmlan Barua x - the vector on which FDFT will be performed 3897504071SAmlan Barua 3997504071SAmlan Barua Output parameter: 4097504071SAmlan Barua y - vector that stores result of FDFT 4197504071SAmlan Barua */ 42b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 43b2b77a04SHong Zhang { 44b2b77a04SHong Zhang PetscErrorCode ierr; 45b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 46b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 47f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 48f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 491acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 50a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 51a1b6d50cSHong Zhang fftw_iodim64 *iodims; 52a1b6d50cSHong Zhang #else 538c1d5ab3SHong Zhang fftw_iodim *iodims; 54a1b6d50cSHong Zhang #endif 551acd80e4SHong Zhang PetscInt i; 561acd80e4SHong Zhang #endif 571acd80e4SHong Zhang PetscInt ndim = fft->ndim,*dim = fft->dim; 58b2b77a04SHong Zhang 59b2b77a04SHong Zhang PetscFunctionBegin; 60f4fca6d4SMatthew G. Knepley ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 61b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 62b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 63b2b77a04SHong Zhang switch (ndim) { 64b2b77a04SHong Zhang case 1: 6558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 66b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6758a912c5SAmlan Barua #else 6858a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 6958a912c5SAmlan Barua #endif 70b2b77a04SHong Zhang break; 71b2b77a04SHong Zhang case 2: 7258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 73b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 7458a912c5SAmlan Barua #else 7558a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 7658a912c5SAmlan Barua #endif 77b2b77a04SHong Zhang break; 78b2b77a04SHong Zhang case 3: 7958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 80b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 8158a912c5SAmlan Barua #else 8258a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 8358a912c5SAmlan Barua #endif 84b2b77a04SHong Zhang break; 85b2b77a04SHong Zhang default: 8658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 87a1b6d50cSHong Zhang iodims = fftw->iodims; 88a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 898c1d5ab3SHong Zhang if (ndim) { 90a1b6d50cSHong Zhang iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1]; 918c1d5ab3SHong Zhang iodims[ndim-1].is = iodims[ndim-1].os = 1; 928c1d5ab3SHong Zhang for (i=ndim-2; i>=0; --i) { 93a1b6d50cSHong Zhang iodims[i].n = (ptrdiff_t)dim[i]; 948c1d5ab3SHong Zhang iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 958c1d5ab3SHong Zhang } 968c1d5ab3SHong Zhang } 97a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 98a1b6d50cSHong Zhang #else 99a1b6d50cSHong Zhang if (ndim) { 100a1b6d50cSHong Zhang iodims[ndim-1].n = (int)dim[ndim-1]; 101a1b6d50cSHong Zhang iodims[ndim-1].is = iodims[ndim-1].os = 1; 102a1b6d50cSHong Zhang for (i=ndim-2; i>=0; --i) { 103a1b6d50cSHong Zhang iodims[i].n = (int)dim[i]; 104a1b6d50cSHong Zhang iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 105a1b6d50cSHong Zhang } 106a1b6d50cSHong Zhang } 107a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 108a1b6d50cSHong Zhang #endif 109a1b6d50cSHong Zhang 11058a912c5SAmlan Barua #else 111a31b9140SHong Zhang fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 11258a912c5SAmlan Barua #endif 113b2b77a04SHong Zhang break; 114b2b77a04SHong Zhang } 115f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *) x_array; 116b2b77a04SHong Zhang fftw->foutarray = y_array; 117b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 118b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 119b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 120b2b77a04SHong Zhang } else { /* use existing plan */ 121b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 1227e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 123b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 1247e4bc134SDominic Meiser #else 1257e4bc134SDominic Meiser fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array); 1267e4bc134SDominic Meiser #endif 127b2b77a04SHong Zhang } else { 128b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 129b2b77a04SHong Zhang } 130b2b77a04SHong Zhang } 131b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 132f4fca6d4SMatthew G. Knepley ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 133b2b77a04SHong Zhang PetscFunctionReturn(0); 134b2b77a04SHong Zhang } 135b2b77a04SHong Zhang 13697504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT 13797504071SAmlan Barua Input parameter: 1383564f093SHong Zhang A - the matrix 13997504071SAmlan Barua x - the vector on which BDFT will be performed 14097504071SAmlan Barua 14197504071SAmlan Barua Output parameter: 14297504071SAmlan Barua y - vector that stores result of BDFT 14397504071SAmlan Barua */ 14497504071SAmlan Barua 145b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 146b2b77a04SHong Zhang { 147b2b77a04SHong Zhang PetscErrorCode ierr; 148b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 149b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 150f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 151f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 152b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 1531acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 154a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 155a1b6d50cSHong Zhang fftw_iodim64 *iodims=fftw->iodims; 156a1b6d50cSHong Zhang #else 157a1b6d50cSHong Zhang fftw_iodim *iodims=fftw->iodims; 158a1b6d50cSHong Zhang #endif 1591acd80e4SHong Zhang #endif 160b2b77a04SHong Zhang 161b2b77a04SHong Zhang PetscFunctionBegin; 162f4fca6d4SMatthew G. Knepley ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 163b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 164b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 165b2b77a04SHong Zhang switch (ndim) { 166b2b77a04SHong Zhang case 1: 16758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 168b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 16958a912c5SAmlan Barua #else 170e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 17158a912c5SAmlan Barua #endif 172b2b77a04SHong Zhang break; 173b2b77a04SHong Zhang case 2: 17458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 175b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 17658a912c5SAmlan Barua #else 177e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 17858a912c5SAmlan Barua #endif 179b2b77a04SHong Zhang break; 180b2b77a04SHong Zhang case 3: 18158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 182b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 18358a912c5SAmlan Barua #else 184e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 18558a912c5SAmlan Barua #endif 186b2b77a04SHong Zhang break; 187b2b77a04SHong Zhang default: 18858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 189a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 190a1b6d50cSHong Zhang fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 191a1b6d50cSHong Zhang #else 1928c1d5ab3SHong Zhang fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 193a1b6d50cSHong Zhang #endif 19458a912c5SAmlan Barua #else 195a31b9140SHong Zhang fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 19658a912c5SAmlan Barua #endif 197b2b77a04SHong Zhang break; 198b2b77a04SHong Zhang } 199f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *) x_array; 200b2b77a04SHong Zhang fftw->boutarray = y_array; 201b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 202b2b77a04SHong Zhang } else { /* use existing plan */ 203b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 2047e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 205b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 2067e4bc134SDominic Meiser #else 2077e4bc134SDominic Meiser fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array); 2087e4bc134SDominic Meiser #endif 209b2b77a04SHong Zhang } else { 210b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 211b2b77a04SHong Zhang } 212b2b77a04SHong Zhang } 213b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 214f4fca6d4SMatthew G. Knepley ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 215b2b77a04SHong Zhang PetscFunctionReturn(0); 216b2b77a04SHong Zhang } 217b2b77a04SHong Zhang 21897504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel 21997504071SAmlan Barua Input parameter: 2203564f093SHong Zhang A - the matrix 22197504071SAmlan Barua x - the vector on which FDFT will be performed 22297504071SAmlan Barua 22397504071SAmlan Barua Output parameter: 22497504071SAmlan Barua y - vector that stores result of FDFT 22597504071SAmlan Barua */ 226b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 227b2b77a04SHong Zhang { 228b2b77a04SHong Zhang PetscErrorCode ierr; 229b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 230b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 231f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 232f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 233c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 234ce94432eSBarry Smith MPI_Comm comm; 235b2b77a04SHong Zhang 236b2b77a04SHong Zhang PetscFunctionBegin; 237ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 238f4fca6d4SMatthew G. Knepley ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 239b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 240b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 241b2b77a04SHong Zhang switch (ndim) { 242b2b77a04SHong Zhang case 1: 2433c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 244b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 245ae0a50aaSHong Zhang #else 2464f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 2473c3a9c75SAmlan Barua #endif 248b2b77a04SHong Zhang break; 249b2b77a04SHong Zhang case 2: 25097504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 251b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2523c3a9c75SAmlan Barua #else 2533c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2543c3a9c75SAmlan Barua #endif 255b2b77a04SHong Zhang break; 256b2b77a04SHong Zhang case 3: 2573c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 258b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2593c3a9c75SAmlan Barua #else 26051d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2613c3a9c75SAmlan Barua #endif 262b2b77a04SHong Zhang break; 263b2b77a04SHong Zhang default: 2643c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 265c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2663c3a9c75SAmlan Barua #else 2673c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2683c3a9c75SAmlan Barua #endif 269b2b77a04SHong Zhang break; 270b2b77a04SHong Zhang } 271f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *) x_array; 272b2b77a04SHong Zhang fftw->foutarray = y_array; 273b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 274b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 275b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 276b2b77a04SHong Zhang } else { /* use existing plan */ 277b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 278b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 279b2b77a04SHong Zhang } else { 280b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 281b2b77a04SHong Zhang } 282b2b77a04SHong Zhang } 283b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 284f4fca6d4SMatthew G. Knepley ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 285b2b77a04SHong Zhang PetscFunctionReturn(0); 286b2b77a04SHong Zhang } 287b2b77a04SHong Zhang 28897504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT 28997504071SAmlan Barua Input parameter: 2903564f093SHong Zhang A - the matrix 29197504071SAmlan Barua x - the vector on which BDFT will be performed 29297504071SAmlan Barua 29397504071SAmlan Barua Output parameter: 29497504071SAmlan Barua y - vector that stores result of BDFT 29597504071SAmlan Barua */ 296b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 297b2b77a04SHong Zhang { 298b2b77a04SHong Zhang PetscErrorCode ierr; 299b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 300b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 301f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 302f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 303c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 304ce94432eSBarry Smith MPI_Comm comm; 305b2b77a04SHong Zhang 306b2b77a04SHong Zhang PetscFunctionBegin; 307ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 308f4fca6d4SMatthew G. Knepley ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 309b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 310b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 311b2b77a04SHong Zhang switch (ndim) { 312b2b77a04SHong Zhang case 1: 3133c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 314b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 315ae0a50aaSHong Zhang #else 3164f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 3173c3a9c75SAmlan Barua #endif 318b2b77a04SHong Zhang break; 319b2b77a04SHong Zhang case 2: 32097504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 321b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3223c3a9c75SAmlan Barua #else 3233c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3243c3a9c75SAmlan Barua #endif 325b2b77a04SHong Zhang break; 326b2b77a04SHong Zhang case 3: 3273c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 328b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3293c3a9c75SAmlan Barua #else 3303c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3313c3a9c75SAmlan Barua #endif 332b2b77a04SHong Zhang break; 333b2b77a04SHong Zhang default: 3343c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 335c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3363c3a9c75SAmlan Barua #else 3373c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3383c3a9c75SAmlan Barua #endif 339b2b77a04SHong Zhang break; 340b2b77a04SHong Zhang } 341f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *) x_array; 342b2b77a04SHong Zhang fftw->boutarray = y_array; 343b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 344b2b77a04SHong Zhang } else { /* use existing plan */ 345b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 346b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 347b2b77a04SHong Zhang } else { 348b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 349b2b77a04SHong Zhang } 350b2b77a04SHong Zhang } 351b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 352f4fca6d4SMatthew G. Knepley ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 353b2b77a04SHong Zhang PetscFunctionReturn(0); 354b2b77a04SHong Zhang } 355b2b77a04SHong Zhang 356b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 357b2b77a04SHong Zhang { 358b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 359b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 360b2b77a04SHong Zhang PetscErrorCode ierr; 361b2b77a04SHong Zhang 362b2b77a04SHong Zhang PetscFunctionBegin; 363b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 364b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 3658c1d5ab3SHong Zhang if (fftw->iodims) { 3668c1d5ab3SHong Zhang free(fftw->iodims); 3678c1d5ab3SHong Zhang } 368bb5bf6f6SHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 369bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 3706ccf0b3eSHong Zhang fftw_mpi_cleanup(); 371b2b77a04SHong Zhang PetscFunctionReturn(0); 372b2b77a04SHong Zhang } 373b2b77a04SHong Zhang 374c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 375b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 376b2b77a04SHong Zhang { 377b2b77a04SHong Zhang PetscErrorCode ierr; 378b2b77a04SHong Zhang PetscScalar *array; 379b2b77a04SHong Zhang 380b2b77a04SHong Zhang PetscFunctionBegin; 381b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 382b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 383b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 384b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 385b2b77a04SHong Zhang PetscFunctionReturn(0); 386b2b77a04SHong Zhang } 387b2b77a04SHong Zhang 3885b113e21Ss-sajid-ali 3895b113e21Ss-sajid-ali static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin,Vec *fin_new) 3905b113e21Ss-sajid-ali { 3915b113e21Ss-sajid-ali PetscErrorCode ierr; 3925b113e21Ss-sajid-ali Mat A; 3935b113e21Ss-sajid-ali 3945b113e21Ss-sajid-ali PetscFunctionBegin; 3955b113e21Ss-sajid-ali ierr = PetscObjectQuery((PetscObject)fin,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr); 3965b113e21Ss-sajid-ali ierr = MatCreateVecsFFTW_FFTW(A,fin_new,NULL,NULL);CHKERRQ(ierr); 3975b113e21Ss-sajid-ali PetscFunctionReturn(0); 3985b113e21Ss-sajid-ali } 3995b113e21Ss-sajid-ali 4005b113e21Ss-sajid-ali static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout,Vec *fout_new) 4015b113e21Ss-sajid-ali { 4025b113e21Ss-sajid-ali PetscErrorCode ierr; 4035b113e21Ss-sajid-ali Mat A; 4045b113e21Ss-sajid-ali 4055b113e21Ss-sajid-ali PetscFunctionBegin; 4065b113e21Ss-sajid-ali ierr = PetscObjectQuery((PetscObject)fout,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr); 4075b113e21Ss-sajid-ali ierr = MatCreateVecsFFTW_FFTW(A,NULL,fout_new,NULL);CHKERRQ(ierr); 4085b113e21Ss-sajid-ali PetscFunctionReturn(0); 4095b113e21Ss-sajid-ali } 4105b113e21Ss-sajid-ali 4115b113e21Ss-sajid-ali static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) 4125b113e21Ss-sajid-ali { 4135b113e21Ss-sajid-ali PetscErrorCode ierr; 4145b113e21Ss-sajid-ali Mat A; 4155b113e21Ss-sajid-ali 4165b113e21Ss-sajid-ali PetscFunctionBegin; 4175b113e21Ss-sajid-ali ierr = PetscObjectQuery((PetscObject)bout,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr); 4185b113e21Ss-sajid-ali ierr = MatCreateVecsFFTW_FFTW(A,NULL,NULL,bout_new);CHKERRQ(ierr); 4195b113e21Ss-sajid-ali PetscFunctionReturn(0); 4205b113e21Ss-sajid-ali } 4215b113e21Ss-sajid-ali 4225b113e21Ss-sajid-ali 4234be45526SHong Zhang /*@ 4242a7a6963SBarry Smith MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 4254f7415efSAmlan Barua parallel layout determined by FFTW 4264f7415efSAmlan Barua 4274f7415efSAmlan Barua Collective on Mat 4284f7415efSAmlan Barua 4294f7415efSAmlan Barua Input Parameter: 4303564f093SHong Zhang . A - the matrix 4314f7415efSAmlan Barua 4324f7415efSAmlan Barua Output Parameter: 433cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW 434cc55f3b1SHong Zhang - y - (optional) output vector of forward FFTW 435cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW 4364f7415efSAmlan Barua 4374f7415efSAmlan Barua Level: advanced 4383564f093SHong Zhang 43997504071SAmlan Barua Note: The parallel layout of output of forward FFTW is always same as the input 44097504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 44197504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 44297504071SAmlan Barua Also note that we need to provide enough space while doing parallel real transform. 44397504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 44497504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 445e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 446e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two 447e0ec536eSAmlan Barua zeros if it is odd only one zero is needed. 448e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local 449e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for 450e0ec536eSAmlan Barua each processor and returns that. 4514f7415efSAmlan Barua 45275f45d78SBarry Smith .seealso: MatCreateFFT() 4534be45526SHong Zhang @*/ 4542a7a6963SBarry Smith PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 4554be45526SHong Zhang { 4564be45526SHong Zhang PetscErrorCode ierr; 457b4c3921fSHong Zhang 4584be45526SHong Zhang PetscFunctionBegin; 459163d334eSBarry Smith ierr = PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 4604be45526SHong Zhang PetscFunctionReturn(0); 4614be45526SHong Zhang } 4624be45526SHong Zhang 4632a7a6963SBarry Smith PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 4644f7415efSAmlan Barua { 4654f7415efSAmlan Barua PetscErrorCode ierr; 4664f7415efSAmlan Barua PetscMPIInt size,rank; 467ce94432eSBarry Smith MPI_Comm comm; 4684f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 4694f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 4709496c9aeSAmlan Barua PetscInt N = fft->N; 4714f7415efSAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 4724f7415efSAmlan Barua 4734f7415efSAmlan Barua PetscFunctionBegin; 4744f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 4754f7415efSAmlan Barua PetscValidType(A,1); 476ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4774f7415efSAmlan Barua 4784f8276c3SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 4794f8276c3SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 4804f7415efSAmlan Barua if (size == 1) { /* sequential case */ 4814f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4824f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 4834f7415efSAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 4844f7415efSAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 4854f7415efSAmlan Barua #else 4868ca4c034SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 4878ca4c034SAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 4888ca4c034SAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 4894f7415efSAmlan Barua #endif 49097504071SAmlan Barua } else { /* parallel cases */ 4914f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 4929496c9aeSAmlan Barua ptrdiff_t local_n1; 4939496c9aeSAmlan Barua fftw_complex *data_fout; 4949496c9aeSAmlan Barua ptrdiff_t local_1_start; 4959496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4969496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 4979496c9aeSAmlan Barua #else 4984f7415efSAmlan Barua double *data_finr,*data_boutr; 4996ccf0b3eSHong Zhang PetscInt n1,N1; 5009496c9aeSAmlan Barua ptrdiff_t temp; 5019496c9aeSAmlan Barua #endif 5029496c9aeSAmlan Barua 5034f7415efSAmlan Barua switch (ndim) { 5044f7415efSAmlan Barua case 1: 50557625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5064f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 50757625b84SAmlan Barua #else 50857625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 50957625b84SAmlan Barua if (fin) { 51057625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 511778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5125b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 5135b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 51457625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 51557625b84SAmlan Barua } 51657625b84SAmlan Barua if (fout) { 51757625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 518778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5195b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 5205b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 52157625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 52257625b84SAmlan Barua } 52357625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 52457625b84SAmlan Barua if (bout) { 52557625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 526778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5275b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 5285b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 52957625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 53057625b84SAmlan Barua } 53157625b84SAmlan Barua break; 53257625b84SAmlan Barua #endif 5334f7415efSAmlan Barua case 2: 53497504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 5354f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5364f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 5374f7415efSAmlan Barua if (fin) { 5384f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 539778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 5405b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 5415b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5424f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5434f7415efSAmlan Barua } 54457625b84SAmlan Barua if (fout) { 54557625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 546778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5475b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 5485b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 54957625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 55057625b84SAmlan Barua } 5515b113e21Ss-sajid-ali if (bout) { 5525b113e21Ss-sajid-ali data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 5535b113e21Ss-sajid-ali ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 5545b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 5555b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 5565b113e21Ss-sajid-ali (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5575b113e21Ss-sajid-ali } 5584f7415efSAmlan Barua #else 5594f7415efSAmlan Barua /* Get local size */ 5604f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 5614f7415efSAmlan Barua if (fin) { 5624f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 563778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 5645b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 5655b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5664f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5674f7415efSAmlan Barua } 5684f7415efSAmlan Barua if (fout) { 5694f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 570778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5715b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 5725b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 5734f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5744f7415efSAmlan Barua } 5754f7415efSAmlan Barua if (bout) { 5764f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 577778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 5785b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 5795b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 5804f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5814f7415efSAmlan Barua } 5824f7415efSAmlan Barua #endif 5834f7415efSAmlan Barua break; 5844f7415efSAmlan Barua case 3: 5854f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5864f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5874f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 5884f7415efSAmlan Barua if (fin) { 5894f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 590778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 5915b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 5925b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 5934f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5944f7415efSAmlan Barua } 5955b113e21Ss-sajid-ali if (fout) { 5965b113e21Ss-sajid-ali data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 5975b113e21Ss-sajid-ali ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 5985b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 5995b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6005b113e21Ss-sajid-ali (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6015b113e21Ss-sajid-ali } 6024f7415efSAmlan Barua if (bout) { 6034f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 604778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 6055b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6065b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6074f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6084f7415efSAmlan Barua } 6094f7415efSAmlan Barua #else 6100c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 6110c9b18e4SAmlan Barua if (fin) { 6120c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 613778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 6145b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6155b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6160c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6170c9b18e4SAmlan Barua } 6180c9b18e4SAmlan Barua if (fout) { 6190c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 620778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6215b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6225b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6230c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6240c9b18e4SAmlan Barua } 6250c9b18e4SAmlan Barua if (bout) { 6260c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 627778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 6285b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6295b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6300c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6310c9b18e4SAmlan Barua } 6324f7415efSAmlan Barua #endif 6334f7415efSAmlan Barua break; 6344f7415efSAmlan Barua default: 6354f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6364f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 63726fbe8dcSKarl Rupp 6384f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 63926fbe8dcSKarl Rupp 6404f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 6414f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 64226fbe8dcSKarl Rupp 6434f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 6444f7415efSAmlan Barua 6454f7415efSAmlan Barua if (fin) { 6464f7415efSAmlan Barua data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 647778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 6485b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6495b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6504f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6514f7415efSAmlan Barua } 6525b113e21Ss-sajid-ali if (fout) { 6535b113e21Ss-sajid-ali data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 6545b113e21Ss-sajid-ali ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6555b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6565b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6575b113e21Ss-sajid-ali (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6585b113e21Ss-sajid-ali } 6594f7415efSAmlan Barua if (bout) { 6604f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 661778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 6625b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6635b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6649496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6654f7415efSAmlan Barua } 6664f7415efSAmlan Barua #else 6670c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 6680c9b18e4SAmlan Barua if (fin) { 6690c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 670778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 6715b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6725b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 6730c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6740c9b18e4SAmlan Barua } 6750c9b18e4SAmlan Barua if (fout) { 6760c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 677778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 6785b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6795b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 6800c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6810c9b18e4SAmlan Barua } 6820c9b18e4SAmlan Barua if (bout) { 6830c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 684778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 6855b113e21Ss-sajid-ali ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 6865b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 6870c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6880c9b18e4SAmlan Barua } 6894f7415efSAmlan Barua #endif 6904f7415efSAmlan Barua break; 6914f7415efSAmlan Barua } 692f9d91177SJunchao Zhang /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 693f9d91177SJunchao Zhang v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 694f9d91177SJunchao Zhang memory leaks. We void these pointers here to avoid acident uses. 695f9d91177SJunchao Zhang */ 696f9d91177SJunchao Zhang if (fin) (*fin)->ops->replacearray = NULL; 697f9d91177SJunchao Zhang if (fout) (*fout)->ops->replacearray = NULL; 698f9d91177SJunchao Zhang if (bout) (*bout)->ops->replacearray = NULL; 6994f7415efSAmlan Barua } 7004f7415efSAmlan Barua PetscFunctionReturn(0); 7014f7415efSAmlan Barua } 7024f7415efSAmlan Barua 7033564f093SHong Zhang /*@ 7043564f093SHong Zhang VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 70554efbe56SHong Zhang 7063564f093SHong Zhang Collective on Mat 7073564f093SHong Zhang 7083564f093SHong Zhang Input Parameters: 7093564f093SHong Zhang + A - FFTW matrix 7103564f093SHong Zhang - x - the PETSc vector 7113564f093SHong Zhang 7123564f093SHong Zhang Output Parameters: 7133564f093SHong Zhang . y - the FFTW vector 7143564f093SHong Zhang 715b2b77a04SHong Zhang Options Database Keys: 7163564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags 717b2b77a04SHong Zhang 718b2b77a04SHong Zhang Level: intermediate 7193564f093SHong Zhang 72097504071SAmlan Barua Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 72197504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 72297504071SAmlan Barua zeros. This routine does that job by scattering operation. 72397504071SAmlan Barua 7243564f093SHong Zhang .seealso: VecScatterFFTWToPetsc() 7253564f093SHong Zhang @*/ 7263564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 7273564f093SHong Zhang { 7283564f093SHong Zhang PetscErrorCode ierr; 729b2b77a04SHong Zhang 7303564f093SHong Zhang PetscFunctionBegin; 731163d334eSBarry Smith ierr = PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 7323564f093SHong Zhang PetscFunctionReturn(0); 7333564f093SHong Zhang } 7343c3a9c75SAmlan Barua 73574a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 7363c3a9c75SAmlan Barua { 7373c3a9c75SAmlan Barua PetscErrorCode ierr; 738ce94432eSBarry Smith MPI_Comm comm; 7393c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 7403c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 7419496c9aeSAmlan Barua PetscInt N =fft->N; 742b5d11533SAmlan Barua PetscInt ndim =fft->ndim,*dim=fft->dim; 7439496c9aeSAmlan Barua PetscInt low; 7447a21806cSHong Zhang PetscMPIInt rank,size; 7457a21806cSHong Zhang PetscInt vsize,vsize1; 7467a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 7479496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 7483c3a9c75SAmlan Barua VecScatter vecscat; 7493c3a9c75SAmlan Barua IS list1,list2; 7509496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 7519496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 7529496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 753a31b9140SHong Zhang PetscInt NM; 7549496c9aeSAmlan Barua ptrdiff_t temp; 7559496c9aeSAmlan Barua #endif 7563c3a9c75SAmlan Barua 7573564f093SHong Zhang PetscFunctionBegin; 758ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 759f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 760f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 7610298fd71SBarry Smith ierr = VecGetOwnershipRange(y,&low,NULL); 7623c3a9c75SAmlan Barua 7633564f093SHong Zhang if (size==1) { 7648ca4c034SAmlan Barua ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 7658ca4c034SAmlan Barua ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 7669496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 7679448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 7686971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7696971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7706971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 771b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 7723564f093SHong Zhang } else { 7733c3a9c75SAmlan Barua switch (ndim) { 7743c3a9c75SAmlan Barua case 1: 77564657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7767a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 77726fbe8dcSKarl Rupp 7784f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 7794f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,low,1,&list2); 7809448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 78164657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 78264657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 78364657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 78464657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 78564657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 78664657d84SAmlan Barua #else 787e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 78864657d84SAmlan Barua #endif 7893c3a9c75SAmlan Barua break; 7903c3a9c75SAmlan Barua case 2: 791bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7927a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 79326fbe8dcSKarl Rupp 7944f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 7954f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 7969448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 797bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 798bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 799bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 800bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 801bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 802bd59e6c6SAmlan Barua #else 803c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 80426fbe8dcSKarl Rupp 805854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 806854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 8073c3a9c75SAmlan Barua 8083564f093SHong Zhang if (dim[1]%2==0) { 8093c3a9c75SAmlan Barua NM = dim[1]+2; 8103564f093SHong Zhang } else { 8113c3a9c75SAmlan Barua NM = dim[1]+1; 8123564f093SHong Zhang } 8133c3a9c75SAmlan Barua for (i=0; i<local_n0; i++) { 8143c3a9c75SAmlan Barua for (j=0; j<dim[1]; j++) { 8153c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 8163c3a9c75SAmlan Barua tempindx1 = i*NM + j; 81726fbe8dcSKarl Rupp 8185b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 8193c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 8203c3a9c75SAmlan Barua } 8213c3a9c75SAmlan Barua } 8223c3a9c75SAmlan Barua 8233c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 8243c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 8253c3a9c75SAmlan Barua 8269448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 827f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 828f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 829f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 830b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 831b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 832b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 833b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 834bd59e6c6SAmlan Barua #endif 8359496c9aeSAmlan Barua break; 8363c3a9c75SAmlan Barua 8373c3a9c75SAmlan Barua case 3: 838bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8397a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 84026fbe8dcSKarl Rupp 8414f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 8424f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 8439448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 844bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 845bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 846bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 847bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 848bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 849bd59e6c6SAmlan Barua #else 850*c4762a1bSJed Brown /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 851f1ade23cSHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform"); 8527a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 85326fbe8dcSKarl Rupp 854854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 855854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 85651d1eed7SAmlan Barua 857db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 858db4deed7SKarl Rupp else NM = dim[2]+1; 85951d1eed7SAmlan Barua 86051d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 86151d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 86251d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 86351d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 86451d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 86526fbe8dcSKarl Rupp 86651d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 86751d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 86851d1eed7SAmlan Barua } 86951d1eed7SAmlan Barua } 87051d1eed7SAmlan Barua } 87151d1eed7SAmlan Barua 87251d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 87351d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 8749448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 87551d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 87651d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 87751d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 878b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 879b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 880b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 881b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 882bd59e6c6SAmlan Barua #endif 8839496c9aeSAmlan Barua break; 8843c3a9c75SAmlan Barua 8853c3a9c75SAmlan Barua default: 886bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8877a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 88826fbe8dcSKarl Rupp 8894f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 8904f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 8919448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 892bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 893bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 894bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 895bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 896bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 897bd59e6c6SAmlan Barua #else 898*c4762a1bSJed Brown /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 899f1ade23cSHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform"); 900e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 90126fbe8dcSKarl Rupp 902e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 90326fbe8dcSKarl Rupp 9047a21806cSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 90526fbe8dcSKarl Rupp 906e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 907e5d7f247SAmlan Barua 908e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 909e5d7f247SAmlan Barua 910854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 911854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 912e5d7f247SAmlan Barua 913db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 914db4deed7SKarl Rupp else NM = 1; 915e5d7f247SAmlan Barua 9166971673cSAmlan Barua j = low; 9173564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 9186971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 9196971673cSAmlan Barua indx2[i] = j; 92026fbe8dcSKarl Rupp if (k%dim[ndim-1]==0) j+=NM; 9216971673cSAmlan Barua j++; 9226971673cSAmlan Barua } 9236971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 9246971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 9259448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 9266971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9276971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9286971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 929b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 930b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 931b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 932b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 933bd59e6c6SAmlan Barua #endif 9349496c9aeSAmlan Barua break; 9353c3a9c75SAmlan Barua } 936e81bb599SAmlan Barua } 9373564f093SHong Zhang PetscFunctionReturn(0); 9383c3a9c75SAmlan Barua } 9393c3a9c75SAmlan Barua 9403564f093SHong Zhang /*@ 9413564f093SHong Zhang VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 9423564f093SHong Zhang 9433564f093SHong Zhang Collective on Mat 9443564f093SHong Zhang 9453564f093SHong Zhang Input Parameters: 9463564f093SHong Zhang + A - FFTW matrix 9473564f093SHong Zhang - x - FFTW vector 9483564f093SHong Zhang 9493564f093SHong Zhang Output Parameters: 9503564f093SHong Zhang . y - PETSc vector 9513564f093SHong Zhang 9523564f093SHong Zhang Level: intermediate 9533564f093SHong Zhang 9543564f093SHong Zhang Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 9553564f093SHong Zhang VecScatterFFTWToPetsc removes those extra zeros. 9563564f093SHong Zhang 9573564f093SHong Zhang .seealso: VecScatterPetscToFFTW() 9583564f093SHong Zhang @*/ 95974a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 9603c3a9c75SAmlan Barua { 9613c3a9c75SAmlan Barua PetscErrorCode ierr; 9625fd66863SKarl Rupp 9633c3a9c75SAmlan Barua PetscFunctionBegin; 964163d334eSBarry Smith ierr = PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 9653c3a9c75SAmlan Barua PetscFunctionReturn(0); 9663c3a9c75SAmlan Barua } 96754efbe56SHong Zhang 96874a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 9695b3e41ffSAmlan Barua { 9705b3e41ffSAmlan Barua PetscErrorCode ierr; 971ce94432eSBarry Smith MPI_Comm comm; 9725b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 9735b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 9749496c9aeSAmlan Barua PetscInt N = fft->N; 975b3655f67SAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim; 9769496c9aeSAmlan Barua PetscInt low; 9777a21806cSHong Zhang PetscMPIInt rank,size; 9787a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 9799496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 9805b3e41ffSAmlan Barua VecScatter vecscat; 9815b3e41ffSAmlan Barua IS list1,list2; 9829496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 9839496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 9849496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 985a31b9140SHong Zhang PetscInt NM; 9869496c9aeSAmlan Barua ptrdiff_t temp; 9879496c9aeSAmlan Barua #endif 9889496c9aeSAmlan Barua 9893564f093SHong Zhang PetscFunctionBegin; 990ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 9915b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 9925b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 9930298fd71SBarry Smith ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 9945b3e41ffSAmlan Barua 995e81bb599SAmlan Barua if (size==1) { 996b3655f67SAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 9979448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 9986971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9996971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10006971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1001b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1002e81bb599SAmlan Barua 10033564f093SHong Zhang } else { 1004e81bb599SAmlan Barua switch (ndim) { 1005e81bb599SAmlan Barua case 1: 100664657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10077a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 100826fbe8dcSKarl Rupp 10094f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 10104f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,low,1,&list2); 10119448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 101264657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 101364657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 101464657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 101564657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 101664657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 101764657d84SAmlan Barua #else 10186a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 101964657d84SAmlan Barua #endif 10205b3e41ffSAmlan Barua break; 10215b3e41ffSAmlan Barua case 2: 1022bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10237a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 102426fbe8dcSKarl Rupp 10254f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 10264f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 10279448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1028bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1031bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1032bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1033bd59e6c6SAmlan Barua #else 10347a21806cSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 103526fbe8dcSKarl Rupp 1036854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 1037854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 10385b3e41ffSAmlan Barua 1039db4deed7SKarl Rupp if (dim[1]%2==0) NM = dim[1]+2; 1040db4deed7SKarl Rupp else NM = dim[1]+1; 10415b3e41ffSAmlan Barua 10425b3e41ffSAmlan Barua for (i=0; i<local_n0; i++) { 10435b3e41ffSAmlan Barua for (j=0; j<dim[1]; j++) { 10445b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 10455b3e41ffSAmlan Barua tempindx1 = i*NM + j; 104626fbe8dcSKarl Rupp 10475b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 10485b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 10495b3e41ffSAmlan Barua } 10505b3e41ffSAmlan Barua } 10515b3e41ffSAmlan Barua 10525b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 10535b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 10545b3e41ffSAmlan Barua 10559448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 10565b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10575b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10585b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1059b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1060b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1061b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1062b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1063bd59e6c6SAmlan Barua #endif 10649496c9aeSAmlan Barua break; 10655b3e41ffSAmlan Barua case 3: 1066bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10677a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 106826fbe8dcSKarl Rupp 10694f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 10704f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 10719448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1072bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1073bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1074bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1075bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1076bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1077bd59e6c6SAmlan Barua #else 10787a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 107926fbe8dcSKarl Rupp 1080854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1081854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 108251d1eed7SAmlan Barua 1083db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 1084db4deed7SKarl Rupp else NM = dim[2]+1; 108551d1eed7SAmlan Barua 108651d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 108751d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 108851d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 108951d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 109051d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 109126fbe8dcSKarl Rupp 109251d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 109351d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 109451d1eed7SAmlan Barua } 109551d1eed7SAmlan Barua } 109651d1eed7SAmlan Barua } 109751d1eed7SAmlan Barua 109851d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 109951d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 110051d1eed7SAmlan Barua 11019448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 110251d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 110351d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 110451d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1105b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1106b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1107b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1108b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1109bd59e6c6SAmlan Barua #endif 11109496c9aeSAmlan Barua break; 11115b3e41ffSAmlan Barua default: 1112bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 11137a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 111426fbe8dcSKarl Rupp 11154f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 11164f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 11179448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1118bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1120bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1121bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1122bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1123bd59e6c6SAmlan Barua #else 1124ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 112526fbe8dcSKarl Rupp 1126ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 112726fbe8dcSKarl Rupp 1128c3eba89aSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 112926fbe8dcSKarl Rupp 1130ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1131ba6e06d0SAmlan Barua 1132ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1133ba6e06d0SAmlan Barua 1134854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1135854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1136ba6e06d0SAmlan Barua 1137db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 1138db4deed7SKarl Rupp else NM = 1; 1139ba6e06d0SAmlan Barua 1140ba6e06d0SAmlan Barua j = low; 11413564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1142ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1143ba6e06d0SAmlan Barua indx2[i] = j; 11443564f093SHong Zhang if (k%dim[ndim-1]==0) j+=NM; 1145ba6e06d0SAmlan Barua j++; 1146ba6e06d0SAmlan Barua } 1147ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1148ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1149ba6e06d0SAmlan Barua 11509448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1151ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1152ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1153ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1154b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1155b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1156b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1157b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1158bd59e6c6SAmlan Barua #endif 11599496c9aeSAmlan Barua break; 11605b3e41ffSAmlan Barua } 1161e81bb599SAmlan Barua } 11623564f093SHong Zhang PetscFunctionReturn(0); 11635b3e41ffSAmlan Barua } 11645b3e41ffSAmlan Barua 11653c3a9c75SAmlan Barua /* 11663564f093SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 11673564f093SHong Zhang 11683c3a9c75SAmlan Barua Options Database Keys: 11693c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 11703c3a9c75SAmlan Barua 11713c3a9c75SAmlan Barua Level: intermediate 11723c3a9c75SAmlan Barua 11733c3a9c75SAmlan Barua */ 11748cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1175b2b77a04SHong Zhang { 1176b2b77a04SHong Zhang PetscErrorCode ierr; 1177ce94432eSBarry Smith MPI_Comm comm; 1178b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1179b2b77a04SHong Zhang Mat_FFTW *fftw; 1180b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 11815274ed1bSHong Zhang const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 11825274ed1bSHong Zhang unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1183b2b77a04SHong Zhang PetscBool flg; 11844f48bc67SAmlan Barua PetscInt p_flag,partial_dim=1,ctr; 118511902ff2SHong Zhang PetscMPIInt size,rank; 11869496c9aeSAmlan Barua ptrdiff_t *pdim; 11879496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 11889496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11899496c9aeSAmlan Barua ptrdiff_t temp; 11908ca4c034SAmlan Barua PetscInt N1,tot_dim; 11914f48bc67SAmlan Barua #else 11924f48bc67SAmlan Barua PetscInt n1; 11939496c9aeSAmlan Barua #endif 11949496c9aeSAmlan Barua 1195b2b77a04SHong Zhang PetscFunctionBegin; 1196ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1197b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 119811902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1199c92418ccSAmlan Barua 12001878d478SAmlan Barua fftw_mpi_init(); 120111902ff2SHong Zhang pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 120211902ff2SHong Zhang pdim[0] = dim[0]; 12038ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12048ca4c034SAmlan Barua tot_dim = 2*dim[0]; 12058ca4c034SAmlan Barua #endif 12063564f093SHong Zhang for (ctr=1; ctr<ndim; ctr++) { 12076882372aSHong Zhang partial_dim *= dim[ctr]; 120811902ff2SHong Zhang pdim[ctr] = dim[ctr]; 12098ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1210db4deed7SKarl Rupp if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1211db4deed7SKarl Rupp else tot_dim *= dim[ctr]; 12128ca4c034SAmlan Barua #endif 12136882372aSHong Zhang } 12143c3a9c75SAmlan Barua 1215b2b77a04SHong Zhang if (size == 1) { 1216e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1217b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1218b2b77a04SHong Zhang n = N; 1219e81bb599SAmlan Barua #else 1220e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1221e81bb599SAmlan Barua n = tot_dim; 1222e81bb599SAmlan Barua #endif 1223e81bb599SAmlan Barua 1224b2b77a04SHong Zhang } else { 12257a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 1226b2b77a04SHong Zhang switch (ndim) { 1227b2b77a04SHong Zhang case 1: 12283c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 12293c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1230e5d7f247SAmlan Barua #else 12317a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 123226fbe8dcSKarl Rupp 12336882372aSHong Zhang n = (PetscInt)local_n0; 12349496c9aeSAmlan Barua n1 = (PetscInt)local_n1; 12359496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1236e5d7f247SAmlan Barua #endif 1237b2b77a04SHong Zhang break; 1238b2b77a04SHong Zhang case 2: 12395b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 12407a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1241b2b77a04SHong Zhang /* 1242b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 12430ec8b6e3SBarry Smith PetscSynchronizedFlush(comm,PETSC_STDOUT); 1244b2b77a04SHong Zhang */ 1245b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1246b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 12475b3e41ffSAmlan Barua #else 1248c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 124926fbe8dcSKarl Rupp 12505b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1251c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 12525b3e41ffSAmlan Barua #endif 1253b2b77a04SHong Zhang break; 1254b2b77a04SHong Zhang case 3: 125551d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12567a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 125726fbe8dcSKarl Rupp 12586882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 12596882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 126051d1eed7SAmlan Barua #else 1261c3eba89aSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 126226fbe8dcSKarl Rupp 126351d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1264c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 126551d1eed7SAmlan Barua #endif 1266b2b77a04SHong Zhang break; 1267b2b77a04SHong Zhang default: 1268b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12697a21806cSHong Zhang fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 127026fbe8dcSKarl Rupp 12716882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 12726882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1273b3a17365SAmlan Barua #else 1274b3a17365SAmlan Barua temp = pdim[ndim-1]; 127526fbe8dcSKarl Rupp 1276b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 127726fbe8dcSKarl Rupp 1278c3eba89aSHong Zhang fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 127926fbe8dcSKarl Rupp 1280b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1281b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 128226fbe8dcSKarl Rupp 1283b3a17365SAmlan Barua pdim[ndim-1] = temp; 128426fbe8dcSKarl Rupp 1285c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1286b3a17365SAmlan Barua #endif 1287b2b77a04SHong Zhang break; 1288b2b77a04SHong Zhang } 1289b2b77a04SHong Zhang } 12902277172eSMark Adams free(pdim); 1291b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1292b00a9115SJed Brown ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1293b2b77a04SHong Zhang fft->data = (void*)fftw; 1294b2b77a04SHong Zhang 1295b2b77a04SHong Zhang fft->n = n; 12960c74a584SJed Brown fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1297e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 129826fbe8dcSKarl Rupp 12995e806cb5SMatthew G. Knepley ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 13008c1d5ab3SHong Zhang if (size == 1) { 1301a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 1302a1b6d50cSHong Zhang fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim); 1303a1b6d50cSHong Zhang #else 13048c1d5ab3SHong Zhang fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1305a1b6d50cSHong Zhang #endif 13068c1d5ab3SHong Zhang } 130726fbe8dcSKarl Rupp 1308b1301b2eSHong Zhang for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1309c92418ccSAmlan Barua 1310b2b77a04SHong Zhang fftw->p_forward = 0; 1311b2b77a04SHong Zhang fftw->p_backward = 0; 1312b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1313b2b77a04SHong Zhang 1314b2b77a04SHong Zhang if (size == 1) { 1315b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1316b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1317b2b77a04SHong Zhang } else { 1318b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1319b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1320b2b77a04SHong Zhang } 1321b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1322b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 13234a52bad8SHong Zhang A->preallocated = PETSC_TRUE; 132426fbe8dcSKarl Rupp 13252a7a6963SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr); 1326bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1327bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1328b2b77a04SHong Zhang 1329b2b77a04SHong Zhang /* get runtime options */ 1330ce94432eSBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 13315274ed1bSHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 13325274ed1bSHong Zhang if (flg) { 13335274ed1bSHong Zhang fftw->p_flag = iplans[p_flag]; 13345274ed1bSHong Zhang } 13354a52bad8SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 1336b2b77a04SHong Zhang PetscFunctionReturn(0); 1337b2b77a04SHong Zhang } 13383c3a9c75SAmlan Barua 13393c3a9c75SAmlan Barua 13403c3a9c75SAmlan Barua 13413c3a9c75SAmlan Barua 1342