1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 15a1b6d50cSHong Zhang fftw_iodim64 *iodims; 16a1b6d50cSHong Zhang #else 178c1d5ab3SHong Zhang fftw_iodim *iodims; 18a1b6d50cSHong Zhang #endif 19e5d7f247SAmlan Barua PetscInt partial_dim; 20b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 21b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 22b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 23b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 24b2b77a04SHong Zhang } Mat_FFTW; 25b2b77a04SHong Zhang 26b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 27b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 28b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 29b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 30b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 31b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 32b2b77a04SHong Zhang 3397504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel 3497504071SAmlan Barua Input parameter: 353564f093SHong Zhang A - the matrix 3697504071SAmlan Barua x - the vector on which FDFT will be performed 3797504071SAmlan Barua 3897504071SAmlan Barua Output parameter: 3997504071SAmlan Barua y - vector that stores result of FDFT 4097504071SAmlan Barua */ 41b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 42b2b77a04SHong Zhang { 43b2b77a04SHong Zhang PetscErrorCode ierr; 44b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 45b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 46f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 47f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 481acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 49a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 50a1b6d50cSHong Zhang fftw_iodim64 *iodims; 51a1b6d50cSHong Zhang #else 528c1d5ab3SHong Zhang fftw_iodim *iodims; 53a1b6d50cSHong Zhang #endif 541acd80e4SHong Zhang PetscInt i; 551acd80e4SHong Zhang #endif 561acd80e4SHong Zhang PetscInt ndim = fft->ndim,*dim = fft->dim; 57b2b77a04SHong Zhang 58b2b77a04SHong Zhang PetscFunctionBegin; 59f4fca6d4SMatthew G. Knepley ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 60b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 61b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 62b2b77a04SHong Zhang switch (ndim) { 63b2b77a04SHong Zhang case 1: 6458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 65b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6658a912c5SAmlan Barua #else 6758a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 6858a912c5SAmlan Barua #endif 69b2b77a04SHong Zhang break; 70b2b77a04SHong Zhang case 2: 7158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 72b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 7358a912c5SAmlan Barua #else 7458a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 7558a912c5SAmlan Barua #endif 76b2b77a04SHong Zhang break; 77b2b77a04SHong Zhang case 3: 7858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 79b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 8058a912c5SAmlan Barua #else 8158a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 8258a912c5SAmlan Barua #endif 83b2b77a04SHong Zhang break; 84b2b77a04SHong Zhang default: 8558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 86a1b6d50cSHong Zhang iodims = fftw->iodims; 87a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 888c1d5ab3SHong Zhang if (ndim) { 89a1b6d50cSHong Zhang iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1]; 908c1d5ab3SHong Zhang iodims[ndim-1].is = iodims[ndim-1].os = 1; 918c1d5ab3SHong Zhang for (i=ndim-2; i>=0; --i) { 92a1b6d50cSHong Zhang iodims[i].n = (ptrdiff_t)dim[i]; 938c1d5ab3SHong Zhang iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 948c1d5ab3SHong Zhang } 958c1d5ab3SHong Zhang } 96a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 97a1b6d50cSHong Zhang #else 98a1b6d50cSHong Zhang if (ndim) { 99a1b6d50cSHong Zhang iodims[ndim-1].n = (int)dim[ndim-1]; 100a1b6d50cSHong Zhang iodims[ndim-1].is = iodims[ndim-1].os = 1; 101a1b6d50cSHong Zhang for (i=ndim-2; i>=0; --i) { 102a1b6d50cSHong Zhang iodims[i].n = (int)dim[i]; 103a1b6d50cSHong Zhang iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 104a1b6d50cSHong Zhang } 105a1b6d50cSHong Zhang } 106a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 107a1b6d50cSHong Zhang #endif 108a1b6d50cSHong Zhang 10958a912c5SAmlan Barua #else 110a31b9140SHong Zhang fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 11158a912c5SAmlan Barua #endif 112b2b77a04SHong Zhang break; 113b2b77a04SHong Zhang } 114f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *) x_array; 115b2b77a04SHong Zhang fftw->foutarray = y_array; 116b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 117b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 118b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 119b2b77a04SHong Zhang } else { /* use existing plan */ 120b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 1217e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 122b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 1237e4bc134SDominic Meiser #else 1247e4bc134SDominic Meiser fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array); 1257e4bc134SDominic Meiser #endif 126b2b77a04SHong Zhang } else { 127b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 128b2b77a04SHong Zhang } 129b2b77a04SHong Zhang } 130b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 131f4fca6d4SMatthew G. Knepley ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 132b2b77a04SHong Zhang PetscFunctionReturn(0); 133b2b77a04SHong Zhang } 134b2b77a04SHong Zhang 13597504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT 13697504071SAmlan Barua Input parameter: 1373564f093SHong Zhang A - the matrix 13897504071SAmlan Barua x - the vector on which BDFT will be performed 13997504071SAmlan Barua 14097504071SAmlan Barua Output parameter: 14197504071SAmlan Barua y - vector that stores result of BDFT 14297504071SAmlan Barua */ 14397504071SAmlan Barua 144b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 145b2b77a04SHong Zhang { 146b2b77a04SHong Zhang PetscErrorCode ierr; 147b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 148b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 149f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 150f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 151b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 1521acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 153a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 154a1b6d50cSHong Zhang fftw_iodim64 *iodims=fftw->iodims; 155a1b6d50cSHong Zhang #else 156a1b6d50cSHong Zhang fftw_iodim *iodims=fftw->iodims; 157a1b6d50cSHong Zhang #endif 1581acd80e4SHong Zhang #endif 159b2b77a04SHong Zhang 160b2b77a04SHong Zhang PetscFunctionBegin; 161f4fca6d4SMatthew G. Knepley ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 162b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 163b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 164b2b77a04SHong Zhang switch (ndim) { 165b2b77a04SHong Zhang case 1: 16658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 167b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 16858a912c5SAmlan Barua #else 169e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 17058a912c5SAmlan Barua #endif 171b2b77a04SHong Zhang break; 172b2b77a04SHong Zhang case 2: 17358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 174b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 17558a912c5SAmlan Barua #else 176e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 17758a912c5SAmlan Barua #endif 178b2b77a04SHong Zhang break; 179b2b77a04SHong Zhang case 3: 18058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 181b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 18258a912c5SAmlan Barua #else 183e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 18458a912c5SAmlan Barua #endif 185b2b77a04SHong Zhang break; 186b2b77a04SHong Zhang default: 18758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 188a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 189a1b6d50cSHong Zhang fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 190a1b6d50cSHong Zhang #else 1918c1d5ab3SHong Zhang fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 192a1b6d50cSHong Zhang #endif 19358a912c5SAmlan Barua #else 194a31b9140SHong Zhang fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 19558a912c5SAmlan Barua #endif 196b2b77a04SHong Zhang break; 197b2b77a04SHong Zhang } 198f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *) x_array; 199b2b77a04SHong Zhang fftw->boutarray = y_array; 200b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 201b2b77a04SHong Zhang } else { /* use existing plan */ 202b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 2037e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 204b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 2057e4bc134SDominic Meiser #else 2067e4bc134SDominic Meiser fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array); 2077e4bc134SDominic Meiser #endif 208b2b77a04SHong Zhang } else { 209b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 210b2b77a04SHong Zhang } 211b2b77a04SHong Zhang } 212b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 213f4fca6d4SMatthew G. Knepley ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 214b2b77a04SHong Zhang PetscFunctionReturn(0); 215b2b77a04SHong Zhang } 216b2b77a04SHong Zhang 21797504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel 21897504071SAmlan Barua Input parameter: 2193564f093SHong Zhang A - the matrix 22097504071SAmlan Barua x - the vector on which FDFT will be performed 22197504071SAmlan Barua 22297504071SAmlan Barua Output parameter: 22397504071SAmlan Barua y - vector that stores result of FDFT 22497504071SAmlan Barua */ 225b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 226b2b77a04SHong Zhang { 227b2b77a04SHong Zhang PetscErrorCode ierr; 228b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 229b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 230f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 231f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 232c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 233ce94432eSBarry Smith MPI_Comm comm; 234b2b77a04SHong Zhang 235b2b77a04SHong Zhang PetscFunctionBegin; 236ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 237f4fca6d4SMatthew G. Knepley ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 238b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 239b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 240b2b77a04SHong Zhang switch (ndim) { 241b2b77a04SHong Zhang case 1: 2423c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 243b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 244ae0a50aaSHong Zhang #else 2454f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 2463c3a9c75SAmlan Barua #endif 247b2b77a04SHong Zhang break; 248b2b77a04SHong Zhang case 2: 24997504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 250b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2513c3a9c75SAmlan Barua #else 2523c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2533c3a9c75SAmlan Barua #endif 254b2b77a04SHong Zhang break; 255b2b77a04SHong Zhang case 3: 2563c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 257b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2583c3a9c75SAmlan Barua #else 25951d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2603c3a9c75SAmlan Barua #endif 261b2b77a04SHong Zhang break; 262b2b77a04SHong Zhang default: 2633c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 264c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2653c3a9c75SAmlan Barua #else 2663c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2673c3a9c75SAmlan Barua #endif 268b2b77a04SHong Zhang break; 269b2b77a04SHong Zhang } 270f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *) x_array; 271b2b77a04SHong Zhang fftw->foutarray = y_array; 272b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 273b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 274b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 275b2b77a04SHong Zhang } else { /* use existing plan */ 276b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 277b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 278b2b77a04SHong Zhang } else { 279b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 280b2b77a04SHong Zhang } 281b2b77a04SHong Zhang } 282b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 283f4fca6d4SMatthew G. Knepley ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 284b2b77a04SHong Zhang PetscFunctionReturn(0); 285b2b77a04SHong Zhang } 286b2b77a04SHong Zhang 28797504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT 28897504071SAmlan Barua Input parameter: 2893564f093SHong Zhang A - the matrix 29097504071SAmlan Barua x - the vector on which BDFT will be performed 29197504071SAmlan Barua 29297504071SAmlan Barua Output parameter: 29397504071SAmlan Barua y - vector that stores result of BDFT 29497504071SAmlan Barua */ 295b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 296b2b77a04SHong Zhang { 297b2b77a04SHong Zhang PetscErrorCode ierr; 298b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 299b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 300f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 301f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 302c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 303ce94432eSBarry Smith MPI_Comm comm; 304b2b77a04SHong Zhang 305b2b77a04SHong Zhang PetscFunctionBegin; 306ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 307f4fca6d4SMatthew G. Knepley ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 308b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 309b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 310b2b77a04SHong Zhang switch (ndim) { 311b2b77a04SHong Zhang case 1: 3123c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 313b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 314ae0a50aaSHong Zhang #else 3154f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 3163c3a9c75SAmlan Barua #endif 317b2b77a04SHong Zhang break; 318b2b77a04SHong Zhang case 2: 31997504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 320b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3213c3a9c75SAmlan Barua #else 3223c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3233c3a9c75SAmlan Barua #endif 324b2b77a04SHong Zhang break; 325b2b77a04SHong Zhang case 3: 3263c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 327b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3283c3a9c75SAmlan Barua #else 3293c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3303c3a9c75SAmlan Barua #endif 331b2b77a04SHong Zhang break; 332b2b77a04SHong Zhang default: 3333c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 334c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3353c3a9c75SAmlan Barua #else 3363c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3373c3a9c75SAmlan Barua #endif 338b2b77a04SHong Zhang break; 339b2b77a04SHong Zhang } 340f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *) x_array; 341b2b77a04SHong Zhang fftw->boutarray = y_array; 342b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 343b2b77a04SHong Zhang } else { /* use existing plan */ 344b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 345b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 346b2b77a04SHong Zhang } else { 347b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 348b2b77a04SHong Zhang } 349b2b77a04SHong Zhang } 350b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 351f4fca6d4SMatthew G. Knepley ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 352b2b77a04SHong Zhang PetscFunctionReturn(0); 353b2b77a04SHong Zhang } 354b2b77a04SHong Zhang 355b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 356b2b77a04SHong Zhang { 357b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 358b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 359b2b77a04SHong Zhang PetscErrorCode ierr; 360b2b77a04SHong Zhang 361b2b77a04SHong Zhang PetscFunctionBegin; 362b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 363b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 3648c1d5ab3SHong Zhang if (fftw->iodims) { 3658c1d5ab3SHong Zhang free(fftw->iodims); 3668c1d5ab3SHong Zhang } 367bb5bf6f6SHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 368bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 3696ccf0b3eSHong Zhang fftw_mpi_cleanup(); 370b2b77a04SHong Zhang PetscFunctionReturn(0); 371b2b77a04SHong Zhang } 372b2b77a04SHong Zhang 373c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 374b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 375b2b77a04SHong Zhang { 376b2b77a04SHong Zhang PetscErrorCode ierr; 377b2b77a04SHong Zhang PetscScalar *array; 378b2b77a04SHong Zhang 379b2b77a04SHong Zhang PetscFunctionBegin; 380b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 381b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 382b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 383b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 384b2b77a04SHong Zhang PetscFunctionReturn(0); 385b2b77a04SHong Zhang } 386b2b77a04SHong Zhang 3874be45526SHong Zhang /*@ 3882a7a6963SBarry Smith MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 3894f7415efSAmlan Barua parallel layout determined by FFTW 3904f7415efSAmlan Barua 3914f7415efSAmlan Barua Collective on Mat 3924f7415efSAmlan Barua 3934f7415efSAmlan Barua Input Parameter: 3943564f093SHong Zhang . A - the matrix 3954f7415efSAmlan Barua 3964f7415efSAmlan Barua Output Parameter: 397cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW 398cc55f3b1SHong Zhang - y - (optional) output vector of forward FFTW 399cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW 4004f7415efSAmlan Barua 4014f7415efSAmlan Barua Level: advanced 4023564f093SHong Zhang 40397504071SAmlan Barua Note: The parallel layout of output of forward FFTW is always same as the input 40497504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 40597504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 40697504071SAmlan Barua Also note that we need to provide enough space while doing parallel real transform. 40797504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 40897504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 409e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 410e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two 411e0ec536eSAmlan Barua zeros if it is odd only one zero is needed. 412e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local 413e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for 414e0ec536eSAmlan Barua each processor and returns that. 4154f7415efSAmlan Barua 4164f7415efSAmlan Barua .seealso: MatCreateFFTW() 4174be45526SHong Zhang @*/ 4182a7a6963SBarry Smith PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 4194be45526SHong Zhang { 4204be45526SHong Zhang PetscErrorCode ierr; 421b4c3921fSHong Zhang 4224be45526SHong Zhang PetscFunctionBegin; 423163d334eSBarry Smith ierr = PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 4244be45526SHong Zhang PetscFunctionReturn(0); 4254be45526SHong Zhang } 4264be45526SHong Zhang 4272a7a6963SBarry Smith PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 4284f7415efSAmlan Barua { 4294f7415efSAmlan Barua PetscErrorCode ierr; 4304f7415efSAmlan Barua PetscMPIInt size,rank; 431ce94432eSBarry Smith MPI_Comm comm; 4324f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 4334f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 4349496c9aeSAmlan Barua PetscInt N = fft->N; 4354f7415efSAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 4364f7415efSAmlan Barua 4374f7415efSAmlan Barua PetscFunctionBegin; 4384f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 4394f7415efSAmlan Barua PetscValidType(A,1); 440ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4414f7415efSAmlan Barua 4424f8276c3SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 4434f8276c3SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 4444f7415efSAmlan Barua if (size == 1) { /* sequential case */ 4454f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4464f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 4474f7415efSAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 4484f7415efSAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 4494f7415efSAmlan Barua #else 4508ca4c034SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 4518ca4c034SAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 4528ca4c034SAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 4534f7415efSAmlan Barua #endif 45497504071SAmlan Barua } else { /* parallel cases */ 4554f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 4569496c9aeSAmlan Barua ptrdiff_t local_n1; 4579496c9aeSAmlan Barua fftw_complex *data_fout; 4589496c9aeSAmlan Barua ptrdiff_t local_1_start; 4599496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4609496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 4619496c9aeSAmlan Barua #else 4624f7415efSAmlan Barua double *data_finr,*data_boutr; 4636ccf0b3eSHong Zhang PetscInt n1,N1; 4649496c9aeSAmlan Barua ptrdiff_t temp; 4659496c9aeSAmlan Barua #endif 4669496c9aeSAmlan Barua 4674f7415efSAmlan Barua switch (ndim) { 4684f7415efSAmlan Barua case 1: 46957625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4704f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 47157625b84SAmlan Barua #else 47257625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 47357625b84SAmlan Barua if (fin) { 47457625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 475778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 47626fbe8dcSKarl Rupp 47757625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 47857625b84SAmlan Barua } 47957625b84SAmlan Barua if (fout) { 48057625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 481778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 48226fbe8dcSKarl Rupp 48357625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 48457625b84SAmlan Barua } 48557625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 48657625b84SAmlan Barua if (bout) { 48757625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 488778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 48926fbe8dcSKarl Rupp 49057625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 49157625b84SAmlan Barua } 49257625b84SAmlan Barua break; 49357625b84SAmlan Barua #endif 4944f7415efSAmlan Barua case 2: 49597504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 4964f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 4974f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 4984f7415efSAmlan Barua if (fin) { 4994f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 500778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 50126fbe8dcSKarl Rupp 5024f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5034f7415efSAmlan Barua } 5044f7415efSAmlan Barua if (bout) { 5054f7415efSAmlan Barua data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 506778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 50726fbe8dcSKarl Rupp 5084f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5094f7415efSAmlan Barua } 51057625b84SAmlan Barua if (fout) { 51157625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 512778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 51326fbe8dcSKarl Rupp 51457625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 51557625b84SAmlan Barua } 5164f7415efSAmlan Barua #else 5174f7415efSAmlan Barua /* Get local size */ 5184f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 5194f7415efSAmlan Barua if (fin) { 5204f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 521778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 52226fbe8dcSKarl Rupp 5234f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5244f7415efSAmlan Barua } 5254f7415efSAmlan Barua if (fout) { 5264f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 527778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 52826fbe8dcSKarl Rupp 5294f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5304f7415efSAmlan Barua } 5314f7415efSAmlan Barua if (bout) { 5324f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 533778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 53426fbe8dcSKarl Rupp 5354f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5364f7415efSAmlan Barua } 5374f7415efSAmlan Barua #endif 5384f7415efSAmlan Barua break; 5394f7415efSAmlan Barua case 3: 5404f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5414f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5424f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 5434f7415efSAmlan Barua if (fin) { 5444f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 545778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 54626fbe8dcSKarl Rupp 5474f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5484f7415efSAmlan Barua } 5494f7415efSAmlan Barua if (bout) { 5504f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 551778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 55226fbe8dcSKarl Rupp 5534f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5544f7415efSAmlan Barua } 5553564f093SHong Zhang 55657625b84SAmlan Barua if (fout) { 55757625b84SAmlan Barua data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 558778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 55926fbe8dcSKarl Rupp 56057625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 56157625b84SAmlan Barua } 5624f7415efSAmlan Barua #else 5630c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 5640c9b18e4SAmlan Barua if (fin) { 5650c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 566778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 56726fbe8dcSKarl Rupp 5680c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5690c9b18e4SAmlan Barua } 5700c9b18e4SAmlan Barua if (fout) { 5710c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 572778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 57326fbe8dcSKarl Rupp 5740c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5750c9b18e4SAmlan Barua } 5760c9b18e4SAmlan Barua if (bout) { 5770c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 578778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 57926fbe8dcSKarl Rupp 5800c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5810c9b18e4SAmlan Barua } 5824f7415efSAmlan Barua #endif 5834f7415efSAmlan Barua break; 5844f7415efSAmlan Barua default: 5854f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5864f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 58726fbe8dcSKarl Rupp 5884f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 58926fbe8dcSKarl Rupp 5904f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5914f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 59226fbe8dcSKarl Rupp 5934f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 5944f7415efSAmlan Barua 5954f7415efSAmlan Barua if (fin) { 5964f7415efSAmlan Barua data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 597778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 59826fbe8dcSKarl Rupp 5994f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6004f7415efSAmlan Barua } 6014f7415efSAmlan Barua if (bout) { 6024f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 603778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 60426fbe8dcSKarl Rupp 6059496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6064f7415efSAmlan Barua } 6073564f093SHong Zhang 60857625b84SAmlan Barua if (fout) { 60957625b84SAmlan Barua data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 610778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 61126fbe8dcSKarl Rupp 61257625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 61357625b84SAmlan Barua } 6144f7415efSAmlan Barua #else 6150c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 6160c9b18e4SAmlan Barua if (fin) { 6170c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 618778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 61926fbe8dcSKarl Rupp 6200c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6210c9b18e4SAmlan Barua } 6220c9b18e4SAmlan Barua if (fout) { 6230c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 624778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 62526fbe8dcSKarl Rupp 6260c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6270c9b18e4SAmlan Barua } 6280c9b18e4SAmlan Barua if (bout) { 6290c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 630778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 63126fbe8dcSKarl Rupp 6320c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6330c9b18e4SAmlan Barua } 6344f7415efSAmlan Barua #endif 6354f7415efSAmlan Barua break; 6364f7415efSAmlan Barua } 6374f7415efSAmlan Barua } 6384f7415efSAmlan Barua PetscFunctionReturn(0); 6394f7415efSAmlan Barua } 6404f7415efSAmlan Barua 6413564f093SHong Zhang /*@ 6423564f093SHong Zhang VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 64354efbe56SHong Zhang 6443564f093SHong Zhang Collective on Mat 6453564f093SHong Zhang 6463564f093SHong Zhang Input Parameters: 6473564f093SHong Zhang + A - FFTW matrix 6483564f093SHong Zhang - x - the PETSc vector 6493564f093SHong Zhang 6503564f093SHong Zhang Output Parameters: 6513564f093SHong Zhang . y - the FFTW vector 6523564f093SHong Zhang 653b2b77a04SHong Zhang Options Database Keys: 6543564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags 655b2b77a04SHong Zhang 656b2b77a04SHong Zhang Level: intermediate 6573564f093SHong Zhang 65897504071SAmlan Barua Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 65997504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 66097504071SAmlan Barua zeros. This routine does that job by scattering operation. 66197504071SAmlan Barua 6623564f093SHong Zhang .seealso: VecScatterFFTWToPetsc() 6633564f093SHong Zhang @*/ 6643564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 6653564f093SHong Zhang { 6663564f093SHong Zhang PetscErrorCode ierr; 667b2b77a04SHong Zhang 6683564f093SHong Zhang PetscFunctionBegin; 669163d334eSBarry Smith ierr = PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 6703564f093SHong Zhang PetscFunctionReturn(0); 6713564f093SHong Zhang } 6723c3a9c75SAmlan Barua 67374a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 6743c3a9c75SAmlan Barua { 6753c3a9c75SAmlan Barua PetscErrorCode ierr; 676ce94432eSBarry Smith MPI_Comm comm; 6773c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 6783c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 6799496c9aeSAmlan Barua PetscInt N =fft->N; 680b5d11533SAmlan Barua PetscInt ndim =fft->ndim,*dim=fft->dim; 6819496c9aeSAmlan Barua PetscInt low; 6827a21806cSHong Zhang PetscMPIInt rank,size; 6837a21806cSHong Zhang PetscInt vsize,vsize1; 6847a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 6859496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 6863c3a9c75SAmlan Barua VecScatter vecscat; 6873c3a9c75SAmlan Barua IS list1,list2; 6889496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6899496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 6909496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 691a31b9140SHong Zhang PetscInt NM; 6929496c9aeSAmlan Barua ptrdiff_t temp; 6939496c9aeSAmlan Barua #endif 6943c3a9c75SAmlan Barua 6953564f093SHong Zhang PetscFunctionBegin; 696ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 697f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 698f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 6990298fd71SBarry Smith ierr = VecGetOwnershipRange(y,&low,NULL); 7003c3a9c75SAmlan Barua 7013564f093SHong Zhang if (size==1) { 7028ca4c034SAmlan Barua ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 7038ca4c034SAmlan Barua ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 7049496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 705*9448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 7066971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7076971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7086971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 709b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 7103564f093SHong Zhang } else { 7113c3a9c75SAmlan Barua switch (ndim) { 7123c3a9c75SAmlan Barua case 1: 71364657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7147a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 71526fbe8dcSKarl Rupp 7164f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 7174f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,low,1,&list2); 718*9448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 71964657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 72064657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 72164657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 72264657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 72364657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 72464657d84SAmlan Barua #else 725e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 72664657d84SAmlan Barua #endif 7273c3a9c75SAmlan Barua break; 7283c3a9c75SAmlan Barua case 2: 729bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7307a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 73126fbe8dcSKarl Rupp 7324f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 7334f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 734*9448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 735bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 736bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 737bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 738bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 739bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 740bd59e6c6SAmlan Barua #else 741c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 74226fbe8dcSKarl Rupp 743854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 744854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 7453c3a9c75SAmlan Barua 7463564f093SHong Zhang if (dim[1]%2==0) { 7473c3a9c75SAmlan Barua NM = dim[1]+2; 7483564f093SHong Zhang } else { 7493c3a9c75SAmlan Barua NM = dim[1]+1; 7503564f093SHong Zhang } 7513c3a9c75SAmlan Barua for (i=0; i<local_n0; i++) { 7523c3a9c75SAmlan Barua for (j=0; j<dim[1]; j++) { 7533c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 7543c3a9c75SAmlan Barua tempindx1 = i*NM + j; 75526fbe8dcSKarl Rupp 7565b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 7573c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 7583c3a9c75SAmlan Barua } 7593c3a9c75SAmlan Barua } 7603c3a9c75SAmlan Barua 7613c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 7623c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 7633c3a9c75SAmlan Barua 764*9448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 765f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 766f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 767f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 768b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 769b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 770b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 771b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 772bd59e6c6SAmlan Barua #endif 7739496c9aeSAmlan Barua break; 7743c3a9c75SAmlan Barua 7753c3a9c75SAmlan Barua case 3: 776bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7777a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 77826fbe8dcSKarl Rupp 7794f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 7804f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 781*9448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 782bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 783bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 784bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 785bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 786bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 787bd59e6c6SAmlan Barua #else 788f1ade23cSHong Zhang /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */ 789f1ade23cSHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform"); 7907a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 79126fbe8dcSKarl Rupp 792854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 793854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 79451d1eed7SAmlan Barua 795db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 796db4deed7SKarl Rupp else NM = dim[2]+1; 79751d1eed7SAmlan Barua 79851d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 79951d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 80051d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 80151d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 80251d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 80326fbe8dcSKarl Rupp 80451d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 80551d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 80651d1eed7SAmlan Barua } 80751d1eed7SAmlan Barua } 80851d1eed7SAmlan Barua } 80951d1eed7SAmlan Barua 81051d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 81151d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 812*9448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 81351d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 81451d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 81551d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 816b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 817b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 818b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 819b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 820bd59e6c6SAmlan Barua #endif 8219496c9aeSAmlan Barua break; 8223c3a9c75SAmlan Barua 8233c3a9c75SAmlan Barua default: 824bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8257a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 82626fbe8dcSKarl Rupp 8274f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 8284f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 829*9448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 830bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 831bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 832bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 833bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 834bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 835bd59e6c6SAmlan Barua #else 836f1ade23cSHong Zhang /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */ 837f1ade23cSHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform"); 838e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 83926fbe8dcSKarl Rupp 840e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 84126fbe8dcSKarl Rupp 8427a21806cSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 84326fbe8dcSKarl Rupp 844e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 845e5d7f247SAmlan Barua 846e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 847e5d7f247SAmlan Barua 848854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 849854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 850e5d7f247SAmlan Barua 851db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 852db4deed7SKarl Rupp else NM = 1; 853e5d7f247SAmlan Barua 8546971673cSAmlan Barua j = low; 8553564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 8566971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 8576971673cSAmlan Barua indx2[i] = j; 85826fbe8dcSKarl Rupp if (k%dim[ndim-1]==0) j+=NM; 8596971673cSAmlan Barua j++; 8606971673cSAmlan Barua } 8616971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 8626971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 863*9448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 8646971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8656971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8666971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 867b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 868b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 869b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 870b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 871bd59e6c6SAmlan Barua #endif 8729496c9aeSAmlan Barua break; 8733c3a9c75SAmlan Barua } 874e81bb599SAmlan Barua } 8753564f093SHong Zhang PetscFunctionReturn(0); 8763c3a9c75SAmlan Barua } 8773c3a9c75SAmlan Barua 8783564f093SHong Zhang /*@ 8793564f093SHong Zhang VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 8803564f093SHong Zhang 8813564f093SHong Zhang Collective on Mat 8823564f093SHong Zhang 8833564f093SHong Zhang Input Parameters: 8843564f093SHong Zhang + A - FFTW matrix 8853564f093SHong Zhang - x - FFTW vector 8863564f093SHong Zhang 8873564f093SHong Zhang Output Parameters: 8883564f093SHong Zhang . y - PETSc vector 8893564f093SHong Zhang 8903564f093SHong Zhang Level: intermediate 8913564f093SHong Zhang 8923564f093SHong Zhang Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 8933564f093SHong Zhang VecScatterFFTWToPetsc removes those extra zeros. 8943564f093SHong Zhang 8953564f093SHong Zhang .seealso: VecScatterPetscToFFTW() 8963564f093SHong Zhang @*/ 89774a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 8983c3a9c75SAmlan Barua { 8993c3a9c75SAmlan Barua PetscErrorCode ierr; 9005fd66863SKarl Rupp 9013c3a9c75SAmlan Barua PetscFunctionBegin; 902163d334eSBarry Smith ierr = PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 9033c3a9c75SAmlan Barua PetscFunctionReturn(0); 9043c3a9c75SAmlan Barua } 90554efbe56SHong Zhang 90674a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 9075b3e41ffSAmlan Barua { 9085b3e41ffSAmlan Barua PetscErrorCode ierr; 909ce94432eSBarry Smith MPI_Comm comm; 9105b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 9115b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 9129496c9aeSAmlan Barua PetscInt N = fft->N; 913b3655f67SAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim; 9149496c9aeSAmlan Barua PetscInt low; 9157a21806cSHong Zhang PetscMPIInt rank,size; 9167a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 9179496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 9185b3e41ffSAmlan Barua VecScatter vecscat; 9195b3e41ffSAmlan Barua IS list1,list2; 9209496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 9219496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 9229496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 923a31b9140SHong Zhang PetscInt NM; 9249496c9aeSAmlan Barua ptrdiff_t temp; 9259496c9aeSAmlan Barua #endif 9269496c9aeSAmlan Barua 9273564f093SHong Zhang PetscFunctionBegin; 928ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 9295b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 9305b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 9310298fd71SBarry Smith ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 9325b3e41ffSAmlan Barua 933e81bb599SAmlan Barua if (size==1) { 934b3655f67SAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 935*9448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 9366971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9376971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9386971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 939b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 940e81bb599SAmlan Barua 9413564f093SHong Zhang } else { 942e81bb599SAmlan Barua switch (ndim) { 943e81bb599SAmlan Barua case 1: 94464657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 9457a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 94626fbe8dcSKarl Rupp 9474f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 9484f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,low,1,&list2); 949*9448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 95064657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 95164657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 95264657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 95364657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 95464657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 95564657d84SAmlan Barua #else 9566a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 95764657d84SAmlan Barua #endif 9585b3e41ffSAmlan Barua break; 9595b3e41ffSAmlan Barua case 2: 960bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 9617a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 96226fbe8dcSKarl Rupp 9634f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 9644f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 965*9448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 966bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 967bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 968bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 969bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 970bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 971bd59e6c6SAmlan Barua #else 9727a21806cSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 97326fbe8dcSKarl Rupp 974854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 975854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 9765b3e41ffSAmlan Barua 977db4deed7SKarl Rupp if (dim[1]%2==0) NM = dim[1]+2; 978db4deed7SKarl Rupp else NM = dim[1]+1; 9795b3e41ffSAmlan Barua 9805b3e41ffSAmlan Barua for (i=0; i<local_n0; i++) { 9815b3e41ffSAmlan Barua for (j=0; j<dim[1]; j++) { 9825b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 9835b3e41ffSAmlan Barua tempindx1 = i*NM + j; 98426fbe8dcSKarl Rupp 9855b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 9865b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 9875b3e41ffSAmlan Barua } 9885b3e41ffSAmlan Barua } 9895b3e41ffSAmlan Barua 9905b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 9915b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 9925b3e41ffSAmlan Barua 993*9448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 9945b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9955b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9965b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 997b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 998b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 999b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1000b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1001bd59e6c6SAmlan Barua #endif 10029496c9aeSAmlan Barua break; 10035b3e41ffSAmlan Barua case 3: 1004bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10057a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 100626fbe8dcSKarl Rupp 10074f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 10084f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 1009*9448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1010bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1012bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1013bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1014bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1015bd59e6c6SAmlan Barua #else 10167a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 101726fbe8dcSKarl Rupp 1018854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1019854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 102051d1eed7SAmlan Barua 1021db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 1022db4deed7SKarl Rupp else NM = dim[2]+1; 102351d1eed7SAmlan Barua 102451d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 102551d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 102651d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 102751d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 102851d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 102926fbe8dcSKarl Rupp 103051d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 103151d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 103251d1eed7SAmlan Barua } 103351d1eed7SAmlan Barua } 103451d1eed7SAmlan Barua } 103551d1eed7SAmlan Barua 103651d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 103751d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 103851d1eed7SAmlan Barua 1039*9448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 104051d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 104151d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 104251d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1043b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1044b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1045b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1046b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1047bd59e6c6SAmlan Barua #endif 10489496c9aeSAmlan Barua break; 10495b3e41ffSAmlan Barua default: 1050bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10517a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 105226fbe8dcSKarl Rupp 10534f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 10544f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1055*9448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1056bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1057bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1058bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1059bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1060bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1061bd59e6c6SAmlan Barua #else 1062ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 106326fbe8dcSKarl Rupp 1064ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 106526fbe8dcSKarl Rupp 1066c3eba89aSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 106726fbe8dcSKarl Rupp 1068ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1069ba6e06d0SAmlan Barua 1070ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1071ba6e06d0SAmlan Barua 1072854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1073854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1074ba6e06d0SAmlan Barua 1075db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 1076db4deed7SKarl Rupp else NM = 1; 1077ba6e06d0SAmlan Barua 1078ba6e06d0SAmlan Barua j = low; 10793564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1080ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1081ba6e06d0SAmlan Barua indx2[i] = j; 10823564f093SHong Zhang if (k%dim[ndim-1]==0) j+=NM; 1083ba6e06d0SAmlan Barua j++; 1084ba6e06d0SAmlan Barua } 1085ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1086ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1087ba6e06d0SAmlan Barua 1088*9448b7f1SJunchao Zhang ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1089ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1091ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1092b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1093b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1094b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1095b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1096bd59e6c6SAmlan Barua #endif 10979496c9aeSAmlan Barua break; 10985b3e41ffSAmlan Barua } 1099e81bb599SAmlan Barua } 11003564f093SHong Zhang PetscFunctionReturn(0); 11015b3e41ffSAmlan Barua } 11025b3e41ffSAmlan Barua 11033c3a9c75SAmlan Barua /* 11043564f093SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 11053564f093SHong Zhang 11063c3a9c75SAmlan Barua Options Database Keys: 11073c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 11083c3a9c75SAmlan Barua 11093c3a9c75SAmlan Barua Level: intermediate 11103c3a9c75SAmlan Barua 11113c3a9c75SAmlan Barua */ 11128cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1113b2b77a04SHong Zhang { 1114b2b77a04SHong Zhang PetscErrorCode ierr; 1115ce94432eSBarry Smith MPI_Comm comm; 1116b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1117b2b77a04SHong Zhang Mat_FFTW *fftw; 1118b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 11195274ed1bSHong Zhang const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 11205274ed1bSHong Zhang unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1121b2b77a04SHong Zhang PetscBool flg; 11224f48bc67SAmlan Barua PetscInt p_flag,partial_dim=1,ctr; 112311902ff2SHong Zhang PetscMPIInt size,rank; 11249496c9aeSAmlan Barua ptrdiff_t *pdim; 11259496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 11269496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11279496c9aeSAmlan Barua ptrdiff_t temp; 11288ca4c034SAmlan Barua PetscInt N1,tot_dim; 11294f48bc67SAmlan Barua #else 11304f48bc67SAmlan Barua PetscInt n1; 11319496c9aeSAmlan Barua #endif 11329496c9aeSAmlan Barua 1133b2b77a04SHong Zhang PetscFunctionBegin; 1134ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1135b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 113611902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1137c92418ccSAmlan Barua 11381878d478SAmlan Barua fftw_mpi_init(); 113911902ff2SHong Zhang pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 114011902ff2SHong Zhang pdim[0] = dim[0]; 11418ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11428ca4c034SAmlan Barua tot_dim = 2*dim[0]; 11438ca4c034SAmlan Barua #endif 11443564f093SHong Zhang for (ctr=1; ctr<ndim; ctr++) { 11456882372aSHong Zhang partial_dim *= dim[ctr]; 114611902ff2SHong Zhang pdim[ctr] = dim[ctr]; 11478ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1148db4deed7SKarl Rupp if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1149db4deed7SKarl Rupp else tot_dim *= dim[ctr]; 11508ca4c034SAmlan Barua #endif 11516882372aSHong Zhang } 11523c3a9c75SAmlan Barua 1153b2b77a04SHong Zhang if (size == 1) { 1154e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1155b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1156b2b77a04SHong Zhang n = N; 1157e81bb599SAmlan Barua #else 1158e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1159e81bb599SAmlan Barua n = tot_dim; 1160e81bb599SAmlan Barua #endif 1161e81bb599SAmlan Barua 1162b2b77a04SHong Zhang } else { 11637a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 1164b2b77a04SHong Zhang switch (ndim) { 1165b2b77a04SHong Zhang case 1: 11663c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11673c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1168e5d7f247SAmlan Barua #else 11697a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 117026fbe8dcSKarl Rupp 11716882372aSHong Zhang n = (PetscInt)local_n0; 11729496c9aeSAmlan Barua n1 = (PetscInt)local_n1; 11739496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1174e5d7f247SAmlan Barua #endif 1175b2b77a04SHong Zhang break; 1176b2b77a04SHong Zhang case 2: 11775b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 11787a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1179b2b77a04SHong Zhang /* 1180b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 11810ec8b6e3SBarry Smith PetscSynchronizedFlush(comm,PETSC_STDOUT); 1182b2b77a04SHong Zhang */ 1183b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1184b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 11855b3e41ffSAmlan Barua #else 1186c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 118726fbe8dcSKarl Rupp 11885b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1189c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 11905b3e41ffSAmlan Barua #endif 1191b2b77a04SHong Zhang break; 1192b2b77a04SHong Zhang case 3: 119351d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 11947a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 119526fbe8dcSKarl Rupp 11966882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 11976882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 119851d1eed7SAmlan Barua #else 1199c3eba89aSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 120026fbe8dcSKarl Rupp 120151d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1202c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 120351d1eed7SAmlan Barua #endif 1204b2b77a04SHong Zhang break; 1205b2b77a04SHong Zhang default: 1206b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12077a21806cSHong Zhang fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 120826fbe8dcSKarl Rupp 12096882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 12106882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1211b3a17365SAmlan Barua #else 1212b3a17365SAmlan Barua temp = pdim[ndim-1]; 121326fbe8dcSKarl Rupp 1214b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 121526fbe8dcSKarl Rupp 1216c3eba89aSHong Zhang fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 121726fbe8dcSKarl Rupp 1218b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1219b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 122026fbe8dcSKarl Rupp 1221b3a17365SAmlan Barua pdim[ndim-1] = temp; 122226fbe8dcSKarl Rupp 1223c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1224b3a17365SAmlan Barua #endif 1225b2b77a04SHong Zhang break; 1226b2b77a04SHong Zhang } 1227b2b77a04SHong Zhang } 12282277172eSMark Adams free(pdim); 1229b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1230b00a9115SJed Brown ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1231b2b77a04SHong Zhang fft->data = (void*)fftw; 1232b2b77a04SHong Zhang 1233b2b77a04SHong Zhang fft->n = n; 12340c74a584SJed Brown fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1235e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 123626fbe8dcSKarl Rupp 12375e806cb5SMatthew G. Knepley ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 12388c1d5ab3SHong Zhang if (size == 1) { 1239a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 1240a1b6d50cSHong Zhang fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim); 1241a1b6d50cSHong Zhang #else 12428c1d5ab3SHong Zhang fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1243a1b6d50cSHong Zhang #endif 12448c1d5ab3SHong Zhang } 124526fbe8dcSKarl Rupp 1246b1301b2eSHong Zhang for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1247c92418ccSAmlan Barua 1248b2b77a04SHong Zhang fftw->p_forward = 0; 1249b2b77a04SHong Zhang fftw->p_backward = 0; 1250b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1251b2b77a04SHong Zhang 1252b2b77a04SHong Zhang if (size == 1) { 1253b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1254b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1255b2b77a04SHong Zhang } else { 1256b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1257b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1258b2b77a04SHong Zhang } 1259b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1260b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 12614a52bad8SHong Zhang A->preallocated = PETSC_TRUE; 126226fbe8dcSKarl Rupp 12632a7a6963SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr); 1264bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1265bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1266b2b77a04SHong Zhang 1267b2b77a04SHong Zhang /* get runtime options */ 1268ce94432eSBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 12695274ed1bSHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 12705274ed1bSHong Zhang if (flg) { 12715274ed1bSHong Zhang fftw->p_flag = iplans[p_flag]; 12725274ed1bSHong Zhang } 12734a52bad8SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 1274b2b77a04SHong Zhang PetscFunctionReturn(0); 1275b2b77a04SHong Zhang } 12763c3a9c75SAmlan Barua 12773c3a9c75SAmlan Barua 12783c3a9c75SAmlan Barua 12793c3a9c75SAmlan Barua 1280