1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 15a1b6d50cSHong Zhang fftw_iodim64 *iodims; 16a1b6d50cSHong Zhang #else 178c1d5ab3SHong Zhang fftw_iodim *iodims; 18a1b6d50cSHong Zhang #endif 19e5d7f247SAmlan Barua PetscInt partial_dim; 20b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 21b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 22b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 23b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 24b2b77a04SHong Zhang } Mat_FFTW; 25b2b77a04SHong Zhang 26b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 27b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 28b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 29b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 30b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 31b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 32b2b77a04SHong Zhang 3397504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel 3497504071SAmlan Barua Input parameter: 353564f093SHong Zhang A - the matrix 3697504071SAmlan Barua x - the vector on which FDFT will be performed 3797504071SAmlan Barua 3897504071SAmlan Barua Output parameter: 3997504071SAmlan Barua y - vector that stores result of FDFT 4097504071SAmlan Barua */ 41b2b77a04SHong Zhang #undef __FUNCT__ 42b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 43b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 44b2b77a04SHong Zhang { 45b2b77a04SHong Zhang PetscErrorCode ierr; 46b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 47b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 48*f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 49*f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 501acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 51a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 52a1b6d50cSHong Zhang fftw_iodim64 *iodims; 53a1b6d50cSHong Zhang #else 548c1d5ab3SHong Zhang fftw_iodim *iodims; 55a1b6d50cSHong Zhang #endif 561acd80e4SHong Zhang PetscInt i; 571acd80e4SHong Zhang #endif 581acd80e4SHong Zhang PetscInt ndim = fft->ndim,*dim = fft->dim; 59b2b77a04SHong Zhang 60b2b77a04SHong Zhang PetscFunctionBegin; 61*f4fca6d4SMatthew G. Knepley ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 62b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 63b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 64b2b77a04SHong Zhang switch (ndim) { 65b2b77a04SHong Zhang case 1: 6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 67b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6858a912c5SAmlan Barua #else 6958a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 7058a912c5SAmlan Barua #endif 71b2b77a04SHong Zhang break; 72b2b77a04SHong Zhang case 2: 7358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 74b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 7558a912c5SAmlan Barua #else 7658a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 7758a912c5SAmlan Barua #endif 78b2b77a04SHong Zhang break; 79b2b77a04SHong Zhang case 3: 8058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 81b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 8258a912c5SAmlan Barua #else 8358a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 8458a912c5SAmlan Barua #endif 85b2b77a04SHong Zhang break; 86b2b77a04SHong Zhang default: 8758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 88a1b6d50cSHong Zhang iodims = fftw->iodims; 89a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 908c1d5ab3SHong Zhang if (ndim) { 91a1b6d50cSHong Zhang iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1]; 928c1d5ab3SHong Zhang iodims[ndim-1].is = iodims[ndim-1].os = 1; 938c1d5ab3SHong Zhang for (i=ndim-2; i>=0; --i) { 94a1b6d50cSHong Zhang iodims[i].n = (ptrdiff_t)dim[i]; 958c1d5ab3SHong Zhang iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 968c1d5ab3SHong Zhang } 978c1d5ab3SHong Zhang } 98a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 99a1b6d50cSHong Zhang #else 100a1b6d50cSHong Zhang if (ndim) { 101a1b6d50cSHong Zhang iodims[ndim-1].n = (int)dim[ndim-1]; 102a1b6d50cSHong Zhang iodims[ndim-1].is = iodims[ndim-1].os = 1; 103a1b6d50cSHong Zhang for (i=ndim-2; i>=0; --i) { 104a1b6d50cSHong Zhang iodims[i].n = (int)dim[i]; 105a1b6d50cSHong Zhang iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 106a1b6d50cSHong Zhang } 107a1b6d50cSHong Zhang } 108a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 109a1b6d50cSHong Zhang #endif 110a1b6d50cSHong Zhang 11158a912c5SAmlan Barua #else 112a31b9140SHong Zhang fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 11358a912c5SAmlan Barua #endif 114b2b77a04SHong Zhang break; 115b2b77a04SHong Zhang } 116*f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *) x_array; 117b2b77a04SHong Zhang fftw->foutarray = y_array; 118b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 119b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 120b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 121b2b77a04SHong Zhang } else { /* use existing plan */ 122b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 123b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 124b2b77a04SHong Zhang } else { 125b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 126b2b77a04SHong Zhang } 127b2b77a04SHong Zhang } 128b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 129*f4fca6d4SMatthew G. Knepley ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 130b2b77a04SHong Zhang PetscFunctionReturn(0); 131b2b77a04SHong Zhang } 132b2b77a04SHong Zhang 13397504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT 13497504071SAmlan Barua Input parameter: 1353564f093SHong Zhang A - the matrix 13697504071SAmlan Barua x - the vector on which BDFT will be performed 13797504071SAmlan Barua 13897504071SAmlan Barua Output parameter: 13997504071SAmlan Barua y - vector that stores result of BDFT 14097504071SAmlan Barua */ 14197504071SAmlan Barua 142b2b77a04SHong Zhang #undef __FUNCT__ 143b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 144b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 145b2b77a04SHong Zhang { 146b2b77a04SHong Zhang PetscErrorCode ierr; 147b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 148b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 149*f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 150*f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 151b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 1521acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 153a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 154a1b6d50cSHong Zhang fftw_iodim64 *iodims=fftw->iodims; 155a1b6d50cSHong Zhang #else 156a1b6d50cSHong Zhang fftw_iodim *iodims=fftw->iodims; 157a1b6d50cSHong Zhang #endif 1581acd80e4SHong Zhang #endif 159b2b77a04SHong Zhang 160b2b77a04SHong Zhang PetscFunctionBegin; 161*f4fca6d4SMatthew G. Knepley ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 162b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 163b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 164b2b77a04SHong Zhang switch (ndim) { 165b2b77a04SHong Zhang case 1: 16658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 167b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 16858a912c5SAmlan Barua #else 169e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 17058a912c5SAmlan Barua #endif 171b2b77a04SHong Zhang break; 172b2b77a04SHong Zhang case 2: 17358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 174b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 17558a912c5SAmlan Barua #else 176e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 17758a912c5SAmlan Barua #endif 178b2b77a04SHong Zhang break; 179b2b77a04SHong Zhang case 3: 18058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 181b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 18258a912c5SAmlan Barua #else 183e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 18458a912c5SAmlan Barua #endif 185b2b77a04SHong Zhang break; 186b2b77a04SHong Zhang default: 18758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 188a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 189a1b6d50cSHong Zhang fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 190a1b6d50cSHong Zhang #else 1918c1d5ab3SHong Zhang fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 192a1b6d50cSHong Zhang #endif 19358a912c5SAmlan Barua #else 194a31b9140SHong Zhang fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 19558a912c5SAmlan Barua #endif 196b2b77a04SHong Zhang break; 197b2b77a04SHong Zhang } 198*f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *) x_array; 199b2b77a04SHong Zhang fftw->boutarray = y_array; 200b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 201b2b77a04SHong Zhang } else { /* use existing plan */ 202b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 203b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 204b2b77a04SHong Zhang } else { 205b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 206b2b77a04SHong Zhang } 207b2b77a04SHong Zhang } 208b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 209*f4fca6d4SMatthew G. Knepley ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 210b2b77a04SHong Zhang PetscFunctionReturn(0); 211b2b77a04SHong Zhang } 212b2b77a04SHong Zhang 21397504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel 21497504071SAmlan Barua Input parameter: 2153564f093SHong Zhang A - the matrix 21697504071SAmlan Barua x - the vector on which FDFT will be performed 21797504071SAmlan Barua 21897504071SAmlan Barua Output parameter: 21997504071SAmlan Barua y - vector that stores result of FDFT 22097504071SAmlan Barua */ 221b2b77a04SHong Zhang #undef __FUNCT__ 222b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 223b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 224b2b77a04SHong Zhang { 225b2b77a04SHong Zhang PetscErrorCode ierr; 226b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 227b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 228*f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 229*f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 230c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 231ce94432eSBarry Smith MPI_Comm comm; 232b2b77a04SHong Zhang 233b2b77a04SHong Zhang PetscFunctionBegin; 234ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 235*f4fca6d4SMatthew G. Knepley ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 236b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 237b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 238b2b77a04SHong Zhang switch (ndim) { 239b2b77a04SHong Zhang case 1: 2403c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 241b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 242ae0a50aaSHong Zhang #else 2434f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 2443c3a9c75SAmlan Barua #endif 245b2b77a04SHong Zhang break; 246b2b77a04SHong Zhang case 2: 24797504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 248b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2493c3a9c75SAmlan Barua #else 2503c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2513c3a9c75SAmlan Barua #endif 252b2b77a04SHong Zhang break; 253b2b77a04SHong Zhang case 3: 2543c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 255b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2563c3a9c75SAmlan Barua #else 25751d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2583c3a9c75SAmlan Barua #endif 259b2b77a04SHong Zhang break; 260b2b77a04SHong Zhang default: 2613c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 262c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2633c3a9c75SAmlan Barua #else 2643c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2653c3a9c75SAmlan Barua #endif 266b2b77a04SHong Zhang break; 267b2b77a04SHong Zhang } 268*f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *) x_array; 269b2b77a04SHong Zhang fftw->foutarray = y_array; 270b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 271b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 272b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 273b2b77a04SHong Zhang } else { /* use existing plan */ 274b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 275b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 276b2b77a04SHong Zhang } else { 277b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 278b2b77a04SHong Zhang } 279b2b77a04SHong Zhang } 280b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 281*f4fca6d4SMatthew G. Knepley ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 282b2b77a04SHong Zhang PetscFunctionReturn(0); 283b2b77a04SHong Zhang } 284b2b77a04SHong Zhang 28597504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT 28697504071SAmlan Barua Input parameter: 2873564f093SHong Zhang A - the matrix 28897504071SAmlan Barua x - the vector on which BDFT will be performed 28997504071SAmlan Barua 29097504071SAmlan Barua Output parameter: 29197504071SAmlan Barua y - vector that stores result of BDFT 29297504071SAmlan Barua */ 293b2b77a04SHong Zhang #undef __FUNCT__ 294b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 295b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 296b2b77a04SHong Zhang { 297b2b77a04SHong Zhang PetscErrorCode ierr; 298b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 299b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 300*f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 301*f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 302c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 303ce94432eSBarry Smith MPI_Comm comm; 304b2b77a04SHong Zhang 305b2b77a04SHong Zhang PetscFunctionBegin; 306ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 307*f4fca6d4SMatthew G. Knepley ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 308b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 309b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 310b2b77a04SHong Zhang switch (ndim) { 311b2b77a04SHong Zhang case 1: 3123c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 313b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 314ae0a50aaSHong Zhang #else 3154f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 3163c3a9c75SAmlan Barua #endif 317b2b77a04SHong Zhang break; 318b2b77a04SHong Zhang case 2: 31997504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 320b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3213c3a9c75SAmlan Barua #else 3223c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3233c3a9c75SAmlan Barua #endif 324b2b77a04SHong Zhang break; 325b2b77a04SHong Zhang case 3: 3263c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 327b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3283c3a9c75SAmlan Barua #else 3293c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3303c3a9c75SAmlan Barua #endif 331b2b77a04SHong Zhang break; 332b2b77a04SHong Zhang default: 3333c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 334c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3353c3a9c75SAmlan Barua #else 3363c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3373c3a9c75SAmlan Barua #endif 338b2b77a04SHong Zhang break; 339b2b77a04SHong Zhang } 340*f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *) x_array; 341b2b77a04SHong Zhang fftw->boutarray = y_array; 342b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 343b2b77a04SHong Zhang } else { /* use existing plan */ 344b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 345b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 346b2b77a04SHong Zhang } else { 347b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 348b2b77a04SHong Zhang } 349b2b77a04SHong Zhang } 350b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 351*f4fca6d4SMatthew G. Knepley ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 352b2b77a04SHong Zhang PetscFunctionReturn(0); 353b2b77a04SHong Zhang } 354b2b77a04SHong Zhang 355b2b77a04SHong Zhang #undef __FUNCT__ 356b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 357b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 358b2b77a04SHong Zhang { 359b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 360b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 361b2b77a04SHong Zhang PetscErrorCode ierr; 362b2b77a04SHong Zhang 363b2b77a04SHong Zhang PetscFunctionBegin; 364b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 365b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 3668c1d5ab3SHong Zhang if (fftw->iodims) { 3678c1d5ab3SHong Zhang free(fftw->iodims); 3688c1d5ab3SHong Zhang } 369bb5bf6f6SHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 370bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 3716ccf0b3eSHong Zhang fftw_mpi_cleanup(); 372b2b77a04SHong Zhang PetscFunctionReturn(0); 373b2b77a04SHong Zhang } 374b2b77a04SHong Zhang 375c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 376b2b77a04SHong Zhang #undef __FUNCT__ 377b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 378b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 379b2b77a04SHong Zhang { 380b2b77a04SHong Zhang PetscErrorCode ierr; 381b2b77a04SHong Zhang PetscScalar *array; 382b2b77a04SHong Zhang 383b2b77a04SHong Zhang PetscFunctionBegin; 384b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 385b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 386b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 387b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 388b2b77a04SHong Zhang PetscFunctionReturn(0); 389b2b77a04SHong Zhang } 390b2b77a04SHong Zhang 3914f7415efSAmlan Barua #undef __FUNCT__ 3922a7a6963SBarry Smith #define __FUNCT__ "MatCreateVecsFFTW" 3934be45526SHong Zhang /*@ 3942a7a6963SBarry Smith MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 3954f7415efSAmlan Barua parallel layout determined by FFTW 3964f7415efSAmlan Barua 3974f7415efSAmlan Barua Collective on Mat 3984f7415efSAmlan Barua 3994f7415efSAmlan Barua Input Parameter: 4003564f093SHong Zhang . A - the matrix 4014f7415efSAmlan Barua 4024f7415efSAmlan Barua Output Parameter: 403cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW 404cc55f3b1SHong Zhang - y - (optional) output vector of forward FFTW 405cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW 4064f7415efSAmlan Barua 4074f7415efSAmlan Barua Level: advanced 4083564f093SHong Zhang 40997504071SAmlan Barua Note: The parallel layout of output of forward FFTW is always same as the input 41097504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 41197504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 41297504071SAmlan Barua Also note that we need to provide enough space while doing parallel real transform. 41397504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 41497504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 415e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 416e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two 417e0ec536eSAmlan Barua zeros if it is odd only one zero is needed. 418e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local 419e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for 420e0ec536eSAmlan Barua each processor and returns that. 4214f7415efSAmlan Barua 4224f7415efSAmlan Barua .seealso: MatCreateFFTW() 4234be45526SHong Zhang @*/ 4242a7a6963SBarry Smith PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 4254be45526SHong Zhang { 4264be45526SHong Zhang PetscErrorCode ierr; 427b4c3921fSHong Zhang 4284be45526SHong Zhang PetscFunctionBegin; 4292a7a6963SBarry Smith ierr = PetscTryMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 4304be45526SHong Zhang PetscFunctionReturn(0); 4314be45526SHong Zhang } 4324be45526SHong Zhang 4334be45526SHong Zhang #undef __FUNCT__ 4342a7a6963SBarry Smith #define __FUNCT__ "MatCreateVecsFFTW_FFTW" 4352a7a6963SBarry Smith PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 4364f7415efSAmlan Barua { 4374f7415efSAmlan Barua PetscErrorCode ierr; 4384f7415efSAmlan Barua PetscMPIInt size,rank; 439ce94432eSBarry Smith MPI_Comm comm; 4404f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 4414f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 4429496c9aeSAmlan Barua PetscInt N = fft->N; 4434f7415efSAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 4444f7415efSAmlan Barua 4454f7415efSAmlan Barua PetscFunctionBegin; 4464f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 4474f7415efSAmlan Barua PetscValidType(A,1); 448ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4494f7415efSAmlan Barua 4504f8276c3SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 4514f8276c3SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 4524f7415efSAmlan Barua if (size == 1) { /* sequential case */ 4534f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4544f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 4554f7415efSAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 4564f7415efSAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 4574f7415efSAmlan Barua #else 4588ca4c034SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 4598ca4c034SAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 4608ca4c034SAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 4614f7415efSAmlan Barua #endif 46297504071SAmlan Barua } else { /* parallel cases */ 4634f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 4649496c9aeSAmlan Barua ptrdiff_t local_n1; 4659496c9aeSAmlan Barua fftw_complex *data_fout; 4669496c9aeSAmlan Barua ptrdiff_t local_1_start; 4679496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4689496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 4699496c9aeSAmlan Barua #else 4704f7415efSAmlan Barua double *data_finr,*data_boutr; 4716ccf0b3eSHong Zhang PetscInt n1,N1; 4729496c9aeSAmlan Barua ptrdiff_t temp; 4739496c9aeSAmlan Barua #endif 4749496c9aeSAmlan Barua 4754f7415efSAmlan Barua switch (ndim) { 4764f7415efSAmlan Barua case 1: 47757625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4784f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 47957625b84SAmlan Barua #else 48057625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 48157625b84SAmlan Barua if (fin) { 48257625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 483778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 48426fbe8dcSKarl Rupp 48557625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 48657625b84SAmlan Barua } 48757625b84SAmlan Barua if (fout) { 48857625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 489778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 49026fbe8dcSKarl Rupp 49157625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 49257625b84SAmlan Barua } 49357625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 49457625b84SAmlan Barua if (bout) { 49557625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 496778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 49726fbe8dcSKarl Rupp 49857625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 49957625b84SAmlan Barua } 50057625b84SAmlan Barua break; 50157625b84SAmlan Barua #endif 5024f7415efSAmlan Barua case 2: 50397504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 5044f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5054f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 5064f7415efSAmlan Barua if (fin) { 5074f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 508778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 50926fbe8dcSKarl Rupp 5104f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5114f7415efSAmlan Barua } 5124f7415efSAmlan Barua if (bout) { 5134f7415efSAmlan Barua data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 514778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 51526fbe8dcSKarl Rupp 5164f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5174f7415efSAmlan Barua } 51857625b84SAmlan Barua if (fout) { 51957625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 520778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 52126fbe8dcSKarl Rupp 52257625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 52357625b84SAmlan Barua } 5244f7415efSAmlan Barua #else 5254f7415efSAmlan Barua /* Get local size */ 5264f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 5274f7415efSAmlan Barua if (fin) { 5284f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 529778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 53026fbe8dcSKarl Rupp 5314f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5324f7415efSAmlan Barua } 5334f7415efSAmlan Barua if (fout) { 5344f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 535778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 53626fbe8dcSKarl Rupp 5374f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5384f7415efSAmlan Barua } 5394f7415efSAmlan Barua if (bout) { 5404f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 541778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 54226fbe8dcSKarl Rupp 5434f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5444f7415efSAmlan Barua } 5454f7415efSAmlan Barua #endif 5464f7415efSAmlan Barua break; 5474f7415efSAmlan Barua case 3: 5484f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5494f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5504f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 5514f7415efSAmlan Barua if (fin) { 5524f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 553778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 55426fbe8dcSKarl Rupp 5554f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5564f7415efSAmlan Barua } 5574f7415efSAmlan Barua if (bout) { 5584f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 559778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 56026fbe8dcSKarl Rupp 5614f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5624f7415efSAmlan Barua } 5633564f093SHong Zhang 56457625b84SAmlan Barua if (fout) { 56557625b84SAmlan Barua data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 566778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 56726fbe8dcSKarl Rupp 56857625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 56957625b84SAmlan Barua } 5704f7415efSAmlan Barua #else 5710c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 5720c9b18e4SAmlan Barua if (fin) { 5730c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 574778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 57526fbe8dcSKarl Rupp 5760c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5770c9b18e4SAmlan Barua } 5780c9b18e4SAmlan Barua if (fout) { 5790c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 580778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 58126fbe8dcSKarl Rupp 5820c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5830c9b18e4SAmlan Barua } 5840c9b18e4SAmlan Barua if (bout) { 5850c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 586778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 58726fbe8dcSKarl Rupp 5880c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5890c9b18e4SAmlan Barua } 5904f7415efSAmlan Barua #endif 5914f7415efSAmlan Barua break; 5924f7415efSAmlan Barua default: 5934f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5944f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 59526fbe8dcSKarl Rupp 5964f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 59726fbe8dcSKarl Rupp 5984f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5994f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 60026fbe8dcSKarl Rupp 6014f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 6024f7415efSAmlan Barua 6034f7415efSAmlan Barua if (fin) { 6044f7415efSAmlan Barua data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 605778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 60626fbe8dcSKarl Rupp 6074f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6084f7415efSAmlan Barua } 6094f7415efSAmlan Barua if (bout) { 6104f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 611778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 61226fbe8dcSKarl Rupp 6139496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6144f7415efSAmlan Barua } 6153564f093SHong Zhang 61657625b84SAmlan Barua if (fout) { 61757625b84SAmlan Barua data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 618778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 61926fbe8dcSKarl Rupp 62057625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 62157625b84SAmlan Barua } 6224f7415efSAmlan Barua #else 6230c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 6240c9b18e4SAmlan Barua if (fin) { 6250c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 626778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 62726fbe8dcSKarl Rupp 6280c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6290c9b18e4SAmlan Barua } 6300c9b18e4SAmlan Barua if (fout) { 6310c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 632778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 63326fbe8dcSKarl Rupp 6340c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6350c9b18e4SAmlan Barua } 6360c9b18e4SAmlan Barua if (bout) { 6370c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 638778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 63926fbe8dcSKarl Rupp 6400c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6410c9b18e4SAmlan Barua } 6424f7415efSAmlan Barua #endif 6434f7415efSAmlan Barua break; 6444f7415efSAmlan Barua } 6454f7415efSAmlan Barua } 6464f7415efSAmlan Barua PetscFunctionReturn(0); 6474f7415efSAmlan Barua } 6484f7415efSAmlan Barua 649c92418ccSAmlan Barua #undef __FUNCT__ 65074a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW" 6513564f093SHong Zhang /*@ 6523564f093SHong Zhang VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 65354efbe56SHong Zhang 6543564f093SHong Zhang Collective on Mat 6553564f093SHong Zhang 6563564f093SHong Zhang Input Parameters: 6573564f093SHong Zhang + A - FFTW matrix 6583564f093SHong Zhang - x - the PETSc vector 6593564f093SHong Zhang 6603564f093SHong Zhang Output Parameters: 6613564f093SHong Zhang . y - the FFTW vector 6623564f093SHong Zhang 663b2b77a04SHong Zhang Options Database Keys: 6643564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags 665b2b77a04SHong Zhang 666b2b77a04SHong Zhang Level: intermediate 6673564f093SHong Zhang 66897504071SAmlan Barua Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 66997504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 67097504071SAmlan Barua zeros. This routine does that job by scattering operation. 67197504071SAmlan Barua 6723564f093SHong Zhang .seealso: VecScatterFFTWToPetsc() 6733564f093SHong Zhang @*/ 6743564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 6753564f093SHong Zhang { 6763564f093SHong Zhang PetscErrorCode ierr; 677b2b77a04SHong Zhang 6783564f093SHong Zhang PetscFunctionBegin; 6793564f093SHong Zhang ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 6803564f093SHong Zhang PetscFunctionReturn(0); 6813564f093SHong Zhang } 6823c3a9c75SAmlan Barua 6833c3a9c75SAmlan Barua #undef __FUNCT__ 6841986ecc6SHong Zhang #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 68574a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 6863c3a9c75SAmlan Barua { 6873c3a9c75SAmlan Barua PetscErrorCode ierr; 688ce94432eSBarry Smith MPI_Comm comm; 6893c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 6903c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 6919496c9aeSAmlan Barua PetscInt N =fft->N; 692b5d11533SAmlan Barua PetscInt ndim =fft->ndim,*dim=fft->dim; 6939496c9aeSAmlan Barua PetscInt low; 6947a21806cSHong Zhang PetscMPIInt rank,size; 6957a21806cSHong Zhang PetscInt vsize,vsize1; 6967a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 6979496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 6983c3a9c75SAmlan Barua VecScatter vecscat; 6993c3a9c75SAmlan Barua IS list1,list2; 7009496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 7019496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 7029496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 703a31b9140SHong Zhang PetscInt NM; 7049496c9aeSAmlan Barua ptrdiff_t temp; 7059496c9aeSAmlan Barua #endif 7063c3a9c75SAmlan Barua 7073564f093SHong Zhang PetscFunctionBegin; 708ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 709f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 710f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 7110298fd71SBarry Smith ierr = VecGetOwnershipRange(y,&low,NULL); 7123c3a9c75SAmlan Barua 7133564f093SHong Zhang if (size==1) { 7148ca4c034SAmlan Barua ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 7158ca4c034SAmlan Barua ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 7169496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 7176971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 7186971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7196971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7206971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 721b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 7223564f093SHong Zhang } else { 7233c3a9c75SAmlan Barua switch (ndim) { 7243c3a9c75SAmlan Barua case 1: 72564657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7267a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 72726fbe8dcSKarl Rupp 7284f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 7294f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,low,1,&list2); 73064657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 73164657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 73264657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 73364657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 73464657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 73564657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 73664657d84SAmlan Barua #else 737e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 73864657d84SAmlan Barua #endif 7393c3a9c75SAmlan Barua break; 7403c3a9c75SAmlan Barua case 2: 741bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7427a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 74326fbe8dcSKarl Rupp 7444f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 7454f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 746bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 747bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 748bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 749bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 750bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 751bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 752bd59e6c6SAmlan Barua #else 753c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 75426fbe8dcSKarl Rupp 755854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 756854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 7573c3a9c75SAmlan Barua 7583564f093SHong Zhang if (dim[1]%2==0) { 7593c3a9c75SAmlan Barua NM = dim[1]+2; 7603564f093SHong Zhang } else { 7613c3a9c75SAmlan Barua NM = dim[1]+1; 7623564f093SHong Zhang } 7633c3a9c75SAmlan Barua for (i=0; i<local_n0; i++) { 7643c3a9c75SAmlan Barua for (j=0; j<dim[1]; j++) { 7653c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 7663c3a9c75SAmlan Barua tempindx1 = i*NM + j; 76726fbe8dcSKarl Rupp 7685b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 7693c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 7703c3a9c75SAmlan Barua } 7713c3a9c75SAmlan Barua } 7723c3a9c75SAmlan Barua 7733c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 7743c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 7753c3a9c75SAmlan Barua 776f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 777f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 778f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 779f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 780b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 781b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 782b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 783b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 784bd59e6c6SAmlan Barua #endif 7859496c9aeSAmlan Barua break; 7863c3a9c75SAmlan Barua 7873c3a9c75SAmlan Barua case 3: 788bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7897a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 79026fbe8dcSKarl Rupp 7914f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 7924f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 793bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 794bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 795bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 796bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 797bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 798bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 799bd59e6c6SAmlan Barua #else 800f1ade23cSHong Zhang /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */ 801f1ade23cSHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform"); 8027a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 80326fbe8dcSKarl Rupp 804854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 805854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 80651d1eed7SAmlan Barua 807db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 808db4deed7SKarl Rupp else NM = dim[2]+1; 80951d1eed7SAmlan Barua 81051d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 81151d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 81251d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 81351d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 81451d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 81526fbe8dcSKarl Rupp 81651d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 81751d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 81851d1eed7SAmlan Barua } 81951d1eed7SAmlan Barua } 82051d1eed7SAmlan Barua } 82151d1eed7SAmlan Barua 82251d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 82351d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 82451d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 82551d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 82651d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 82751d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 828b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 829b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 830b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 831b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 832bd59e6c6SAmlan Barua #endif 8339496c9aeSAmlan Barua break; 8343c3a9c75SAmlan Barua 8353c3a9c75SAmlan Barua default: 836bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8377a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 83826fbe8dcSKarl Rupp 8394f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 8404f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 841bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 842bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 844bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 845bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 846bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 847bd59e6c6SAmlan Barua #else 848f1ade23cSHong Zhang /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */ 849f1ade23cSHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform"); 850e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 85126fbe8dcSKarl Rupp 852e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 85326fbe8dcSKarl Rupp 8547a21806cSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 85526fbe8dcSKarl Rupp 856e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 857e5d7f247SAmlan Barua 858e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 859e5d7f247SAmlan Barua 860854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 861854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 862e5d7f247SAmlan Barua 863db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 864db4deed7SKarl Rupp else NM = 1; 865e5d7f247SAmlan Barua 8666971673cSAmlan Barua j = low; 8673564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 8686971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 8696971673cSAmlan Barua indx2[i] = j; 87026fbe8dcSKarl Rupp if (k%dim[ndim-1]==0) j+=NM; 8716971673cSAmlan Barua j++; 8726971673cSAmlan Barua } 8736971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 8746971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 8756971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 8766971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8776971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8786971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 879b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 880b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 881b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 882b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 883bd59e6c6SAmlan Barua #endif 8849496c9aeSAmlan Barua break; 8853c3a9c75SAmlan Barua } 886e81bb599SAmlan Barua } 8873564f093SHong Zhang PetscFunctionReturn(0); 8883c3a9c75SAmlan Barua } 8893c3a9c75SAmlan Barua 8903c3a9c75SAmlan Barua #undef __FUNCT__ 89174a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc" 8923564f093SHong Zhang /*@ 8933564f093SHong Zhang VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 8943564f093SHong Zhang 8953564f093SHong Zhang Collective on Mat 8963564f093SHong Zhang 8973564f093SHong Zhang Input Parameters: 8983564f093SHong Zhang + A - FFTW matrix 8993564f093SHong Zhang - x - FFTW vector 9003564f093SHong Zhang 9013564f093SHong Zhang Output Parameters: 9023564f093SHong Zhang . y - PETSc vector 9033564f093SHong Zhang 9043564f093SHong Zhang Level: intermediate 9053564f093SHong Zhang 9063564f093SHong Zhang Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 9073564f093SHong Zhang VecScatterFFTWToPetsc removes those extra zeros. 9083564f093SHong Zhang 9093564f093SHong Zhang .seealso: VecScatterPetscToFFTW() 9103564f093SHong Zhang @*/ 91174a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 9123c3a9c75SAmlan Barua { 9133c3a9c75SAmlan Barua PetscErrorCode ierr; 9145fd66863SKarl Rupp 9153c3a9c75SAmlan Barua PetscFunctionBegin; 91674a26884SAmlan Barua ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 9173c3a9c75SAmlan Barua PetscFunctionReturn(0); 9183c3a9c75SAmlan Barua } 91954efbe56SHong Zhang 9203c3a9c75SAmlan Barua #undef __FUNCT__ 92174a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 92274a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 9235b3e41ffSAmlan Barua { 9245b3e41ffSAmlan Barua PetscErrorCode ierr; 925ce94432eSBarry Smith MPI_Comm comm; 9265b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 9275b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 9289496c9aeSAmlan Barua PetscInt N = fft->N; 929b3655f67SAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim; 9309496c9aeSAmlan Barua PetscInt low; 9317a21806cSHong Zhang PetscMPIInt rank,size; 9327a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 9339496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 9345b3e41ffSAmlan Barua VecScatter vecscat; 9355b3e41ffSAmlan Barua IS list1,list2; 9369496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 9379496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 9389496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 939a31b9140SHong Zhang PetscInt NM; 9409496c9aeSAmlan Barua ptrdiff_t temp; 9419496c9aeSAmlan Barua #endif 9429496c9aeSAmlan Barua 9433564f093SHong Zhang PetscFunctionBegin; 944ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 9455b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 9465b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 9470298fd71SBarry Smith ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 9485b3e41ffSAmlan Barua 949e81bb599SAmlan Barua if (size==1) { 950b3655f67SAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 9516971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 9526971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9536971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9546971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 955b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 956e81bb599SAmlan Barua 9573564f093SHong Zhang } else { 958e81bb599SAmlan Barua switch (ndim) { 959e81bb599SAmlan Barua case 1: 96064657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 9617a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 96226fbe8dcSKarl Rupp 9634f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 9644f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,low,1,&list2); 96564657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 96664657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 96764657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 96864657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 96964657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 97064657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 97164657d84SAmlan Barua #else 9726a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 97364657d84SAmlan Barua #endif 9745b3e41ffSAmlan Barua break; 9755b3e41ffSAmlan Barua case 2: 976bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 9777a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 97826fbe8dcSKarl Rupp 9794f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 9804f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 981bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 982bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 983bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 984bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 985bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 986bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 987bd59e6c6SAmlan Barua #else 9887a21806cSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 98926fbe8dcSKarl Rupp 990854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 991854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 9925b3e41ffSAmlan Barua 993db4deed7SKarl Rupp if (dim[1]%2==0) NM = dim[1]+2; 994db4deed7SKarl Rupp else NM = dim[1]+1; 9955b3e41ffSAmlan Barua 9965b3e41ffSAmlan Barua for (i=0; i<local_n0; i++) { 9975b3e41ffSAmlan Barua for (j=0; j<dim[1]; j++) { 9985b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 9995b3e41ffSAmlan Barua tempindx1 = i*NM + j; 100026fbe8dcSKarl Rupp 10015b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 10025b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 10035b3e41ffSAmlan Barua } 10045b3e41ffSAmlan Barua } 10055b3e41ffSAmlan Barua 10065b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 10075b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 10085b3e41ffSAmlan Barua 10095b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 10105b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10115b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10125b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1013b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1014b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1015b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1016b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1017bd59e6c6SAmlan Barua #endif 10189496c9aeSAmlan Barua break; 10195b3e41ffSAmlan Barua case 3: 1020bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10217a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 102226fbe8dcSKarl Rupp 10234f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 10244f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 1025bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1026bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1027bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1029bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1030bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1031bd59e6c6SAmlan Barua #else 10327a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 103326fbe8dcSKarl Rupp 1034854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1035854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 103651d1eed7SAmlan Barua 1037db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 1038db4deed7SKarl Rupp else NM = dim[2]+1; 103951d1eed7SAmlan Barua 104051d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 104151d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 104251d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 104351d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 104451d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 104526fbe8dcSKarl Rupp 104651d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 104751d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 104851d1eed7SAmlan Barua } 104951d1eed7SAmlan Barua } 105051d1eed7SAmlan Barua } 105151d1eed7SAmlan Barua 105251d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 105351d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 105451d1eed7SAmlan Barua 105551d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 105651d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 105751d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 105851d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1059b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1060b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1061b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1062b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1063bd59e6c6SAmlan Barua #endif 10649496c9aeSAmlan Barua break; 10655b3e41ffSAmlan Barua default: 1066bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10677a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 106826fbe8dcSKarl Rupp 10694f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 10704f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1071bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1072bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1073bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1074bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1075bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1076bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1077bd59e6c6SAmlan Barua #else 1078ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 107926fbe8dcSKarl Rupp 1080ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 108126fbe8dcSKarl Rupp 1082c3eba89aSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 108326fbe8dcSKarl Rupp 1084ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1085ba6e06d0SAmlan Barua 1086ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1087ba6e06d0SAmlan Barua 1088854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1089854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1090ba6e06d0SAmlan Barua 1091db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 1092db4deed7SKarl Rupp else NM = 1; 1093ba6e06d0SAmlan Barua 1094ba6e06d0SAmlan Barua j = low; 10953564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1096ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1097ba6e06d0SAmlan Barua indx2[i] = j; 10983564f093SHong Zhang if (k%dim[ndim-1]==0) j+=NM; 1099ba6e06d0SAmlan Barua j++; 1100ba6e06d0SAmlan Barua } 1101ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1102ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1103ba6e06d0SAmlan Barua 1104ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1105ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1106ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1107ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1108b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1109b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1110b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1111b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1112bd59e6c6SAmlan Barua #endif 11139496c9aeSAmlan Barua break; 11145b3e41ffSAmlan Barua } 1115e81bb599SAmlan Barua } 11163564f093SHong Zhang PetscFunctionReturn(0); 11175b3e41ffSAmlan Barua } 11185b3e41ffSAmlan Barua 11195b3e41ffSAmlan Barua #undef __FUNCT__ 11203c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 11213c3a9c75SAmlan Barua /* 11223564f093SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 11233564f093SHong Zhang 11243c3a9c75SAmlan Barua Options Database Keys: 11253c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 11263c3a9c75SAmlan Barua 11273c3a9c75SAmlan Barua Level: intermediate 11283c3a9c75SAmlan Barua 11293c3a9c75SAmlan Barua */ 11308cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1131b2b77a04SHong Zhang { 1132b2b77a04SHong Zhang PetscErrorCode ierr; 1133ce94432eSBarry Smith MPI_Comm comm; 1134b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1135b2b77a04SHong Zhang Mat_FFTW *fftw; 1136b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 11375274ed1bSHong Zhang const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 11385274ed1bSHong Zhang unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1139b2b77a04SHong Zhang PetscBool flg; 11404f48bc67SAmlan Barua PetscInt p_flag,partial_dim=1,ctr; 114111902ff2SHong Zhang PetscMPIInt size,rank; 11429496c9aeSAmlan Barua ptrdiff_t *pdim; 11439496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 11449496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11459496c9aeSAmlan Barua ptrdiff_t temp; 11468ca4c034SAmlan Barua PetscInt N1,tot_dim; 11474f48bc67SAmlan Barua #else 11484f48bc67SAmlan Barua PetscInt n1; 11499496c9aeSAmlan Barua #endif 11509496c9aeSAmlan Barua 1151b2b77a04SHong Zhang PetscFunctionBegin; 1152ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1153b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 115411902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1155c92418ccSAmlan Barua 11561878d478SAmlan Barua fftw_mpi_init(); 115711902ff2SHong Zhang pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 115811902ff2SHong Zhang pdim[0] = dim[0]; 11598ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11608ca4c034SAmlan Barua tot_dim = 2*dim[0]; 11618ca4c034SAmlan Barua #endif 11623564f093SHong Zhang for (ctr=1; ctr<ndim; ctr++) { 11636882372aSHong Zhang partial_dim *= dim[ctr]; 116411902ff2SHong Zhang pdim[ctr] = dim[ctr]; 11658ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1166db4deed7SKarl Rupp if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1167db4deed7SKarl Rupp else tot_dim *= dim[ctr]; 11688ca4c034SAmlan Barua #endif 11696882372aSHong Zhang } 11703c3a9c75SAmlan Barua 1171b2b77a04SHong Zhang if (size == 1) { 1172e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1173b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1174b2b77a04SHong Zhang n = N; 1175e81bb599SAmlan Barua #else 1176e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1177e81bb599SAmlan Barua n = tot_dim; 1178e81bb599SAmlan Barua #endif 1179e81bb599SAmlan Barua 1180b2b77a04SHong Zhang } else { 11817a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 1182b2b77a04SHong Zhang switch (ndim) { 1183b2b77a04SHong Zhang case 1: 11843c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11853c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1186e5d7f247SAmlan Barua #else 11877a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 118826fbe8dcSKarl Rupp 11896882372aSHong Zhang n = (PetscInt)local_n0; 11909496c9aeSAmlan Barua n1 = (PetscInt)local_n1; 11919496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1192e5d7f247SAmlan Barua #endif 1193b2b77a04SHong Zhang break; 1194b2b77a04SHong Zhang case 2: 11955b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 11967a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1197b2b77a04SHong Zhang /* 1198b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 11990ec8b6e3SBarry Smith PetscSynchronizedFlush(comm,PETSC_STDOUT); 1200b2b77a04SHong Zhang */ 1201b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1202b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 12035b3e41ffSAmlan Barua #else 1204c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 120526fbe8dcSKarl Rupp 12065b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1207c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 12085b3e41ffSAmlan Barua #endif 1209b2b77a04SHong Zhang break; 1210b2b77a04SHong Zhang case 3: 121151d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12127a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 121326fbe8dcSKarl Rupp 12146882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 12156882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 121651d1eed7SAmlan Barua #else 1217c3eba89aSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 121826fbe8dcSKarl Rupp 121951d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1220c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 122151d1eed7SAmlan Barua #endif 1222b2b77a04SHong Zhang break; 1223b2b77a04SHong Zhang default: 1224b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12257a21806cSHong Zhang fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 122626fbe8dcSKarl Rupp 12276882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 12286882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1229b3a17365SAmlan Barua #else 1230b3a17365SAmlan Barua temp = pdim[ndim-1]; 123126fbe8dcSKarl Rupp 1232b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 123326fbe8dcSKarl Rupp 1234c3eba89aSHong Zhang fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 123526fbe8dcSKarl Rupp 1236b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1237b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 123826fbe8dcSKarl Rupp 1239b3a17365SAmlan Barua pdim[ndim-1] = temp; 124026fbe8dcSKarl Rupp 1241c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1242b3a17365SAmlan Barua #endif 1243b2b77a04SHong Zhang break; 1244b2b77a04SHong Zhang } 1245b2b77a04SHong Zhang } 1246b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1247b00a9115SJed Brown ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1248b2b77a04SHong Zhang fft->data = (void*)fftw; 1249b2b77a04SHong Zhang 1250b2b77a04SHong Zhang fft->n = n; 12510c74a584SJed Brown fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1252e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 125326fbe8dcSKarl Rupp 12545e806cb5SMatthew G. Knepley ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 12558c1d5ab3SHong Zhang if (size == 1) { 1256a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 1257a1b6d50cSHong Zhang fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim); 1258a1b6d50cSHong Zhang #else 12598c1d5ab3SHong Zhang fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1260a1b6d50cSHong Zhang #endif 12618c1d5ab3SHong Zhang } 126226fbe8dcSKarl Rupp 1263b1301b2eSHong Zhang for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1264c92418ccSAmlan Barua 1265b2b77a04SHong Zhang fftw->p_forward = 0; 1266b2b77a04SHong Zhang fftw->p_backward = 0; 1267b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1268b2b77a04SHong Zhang 1269b2b77a04SHong Zhang if (size == 1) { 1270b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1271b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1272b2b77a04SHong Zhang } else { 1273b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1274b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1275b2b77a04SHong Zhang } 1276b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1277b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 12784a52bad8SHong Zhang A->preallocated = PETSC_TRUE; 127926fbe8dcSKarl Rupp 12802a7a6963SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr); 1281bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1282bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1283b2b77a04SHong Zhang 1284b2b77a04SHong Zhang /* get runtime options */ 1285ce94432eSBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 12865274ed1bSHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 12875274ed1bSHong Zhang if (flg) { 12885274ed1bSHong Zhang fftw->p_flag = iplans[p_flag]; 12895274ed1bSHong Zhang } 12904a52bad8SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 1291b2b77a04SHong Zhang PetscFunctionReturn(0); 1292b2b77a04SHong Zhang } 12933c3a9c75SAmlan Barua 12943c3a9c75SAmlan Barua 12953c3a9c75SAmlan Barua 12963c3a9c75SAmlan Barua 1297