1b2b77a04SHong Zhang 2b2b77a04SHong Zhang /* 3b2b77a04SHong Zhang Provides an interface to the FFTW package. 4b2b77a04SHong Zhang Testing examples can be found in ~src/mat/examples/tests 5b2b77a04SHong Zhang */ 6b2b77a04SHong Zhang 7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8b2b77a04SHong Zhang EXTERN_C_BEGIN 9c6db04a5SJed Brown #include <fftw3-mpi.h> 10b2b77a04SHong Zhang EXTERN_C_END 11b2b77a04SHong Zhang 12b2b77a04SHong Zhang typedef struct { 13b9ae062cSHong Zhang ptrdiff_t ndim_fftw,*dim_fftw; 14a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 15a1b6d50cSHong Zhang fftw_iodim64 *iodims; 16a1b6d50cSHong Zhang #else 178c1d5ab3SHong Zhang fftw_iodim *iodims; 18a1b6d50cSHong Zhang #endif 19e5d7f247SAmlan Barua PetscInt partial_dim; 20b2b77a04SHong Zhang fftw_plan p_forward,p_backward; 21b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 22b2b77a04SHong Zhang PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 23b2b77a04SHong Zhang executed for the arrays with which the plan was created */ 24b2b77a04SHong Zhang } Mat_FFTW; 25b2b77a04SHong Zhang 26b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 27b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 28b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 29b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 30b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat); 31b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 32b2b77a04SHong Zhang 3397504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel 3497504071SAmlan Barua Input parameter: 353564f093SHong Zhang A - the matrix 3697504071SAmlan Barua x - the vector on which FDFT will be performed 3797504071SAmlan Barua 3897504071SAmlan Barua Output parameter: 3997504071SAmlan Barua y - vector that stores result of FDFT 4097504071SAmlan Barua */ 41b2b77a04SHong Zhang #undef __FUNCT__ 42b2b77a04SHong Zhang #define __FUNCT__ "MatMult_SeqFFTW" 43b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 44b2b77a04SHong Zhang { 45b2b77a04SHong Zhang PetscErrorCode ierr; 46b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 47b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 48f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 49f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 501acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 51a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 52a1b6d50cSHong Zhang fftw_iodim64 *iodims; 53a1b6d50cSHong Zhang #else 548c1d5ab3SHong Zhang fftw_iodim *iodims; 55a1b6d50cSHong Zhang #endif 561acd80e4SHong Zhang PetscInt i; 571acd80e4SHong Zhang #endif 581acd80e4SHong Zhang PetscInt ndim = fft->ndim,*dim = fft->dim; 59b2b77a04SHong Zhang 60b2b77a04SHong Zhang PetscFunctionBegin; 61f4fca6d4SMatthew G. Knepley ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 62b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 63b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 64b2b77a04SHong Zhang switch (ndim) { 65b2b77a04SHong Zhang case 1: 6658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 67b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 6858a912c5SAmlan Barua #else 6958a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 7058a912c5SAmlan Barua #endif 71b2b77a04SHong Zhang break; 72b2b77a04SHong Zhang case 2: 7358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 74b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 7558a912c5SAmlan Barua #else 7658a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 7758a912c5SAmlan Barua #endif 78b2b77a04SHong Zhang break; 79b2b77a04SHong Zhang case 3: 8058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 81b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 8258a912c5SAmlan Barua #else 8358a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 8458a912c5SAmlan Barua #endif 85b2b77a04SHong Zhang break; 86b2b77a04SHong Zhang default: 8758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 88a1b6d50cSHong Zhang iodims = fftw->iodims; 89a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 908c1d5ab3SHong Zhang if (ndim) { 91a1b6d50cSHong Zhang iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1]; 928c1d5ab3SHong Zhang iodims[ndim-1].is = iodims[ndim-1].os = 1; 938c1d5ab3SHong Zhang for (i=ndim-2; i>=0; --i) { 94a1b6d50cSHong Zhang iodims[i].n = (ptrdiff_t)dim[i]; 958c1d5ab3SHong Zhang iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 968c1d5ab3SHong Zhang } 978c1d5ab3SHong Zhang } 98a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 99a1b6d50cSHong Zhang #else 100a1b6d50cSHong Zhang if (ndim) { 101a1b6d50cSHong Zhang iodims[ndim-1].n = (int)dim[ndim-1]; 102a1b6d50cSHong Zhang iodims[ndim-1].is = iodims[ndim-1].os = 1; 103a1b6d50cSHong Zhang for (i=ndim-2; i>=0; --i) { 104a1b6d50cSHong Zhang iodims[i].n = (int)dim[i]; 105a1b6d50cSHong Zhang iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 106a1b6d50cSHong Zhang } 107a1b6d50cSHong Zhang } 108a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 109a1b6d50cSHong Zhang #endif 110a1b6d50cSHong Zhang 11158a912c5SAmlan Barua #else 112a31b9140SHong Zhang fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 11358a912c5SAmlan Barua #endif 114b2b77a04SHong Zhang break; 115b2b77a04SHong Zhang } 116f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *) x_array; 117b2b77a04SHong Zhang fftw->foutarray = y_array; 118b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 119b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 120b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 121b2b77a04SHong Zhang } else { /* use existing plan */ 122b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 1237e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 124b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 1257e4bc134SDominic Meiser #else 1267e4bc134SDominic Meiser fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array); 1277e4bc134SDominic Meiser #endif 128b2b77a04SHong Zhang } else { 129b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 130b2b77a04SHong Zhang } 131b2b77a04SHong Zhang } 132b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 133f4fca6d4SMatthew G. Knepley ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 134b2b77a04SHong Zhang PetscFunctionReturn(0); 135b2b77a04SHong Zhang } 136b2b77a04SHong Zhang 13797504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT 13897504071SAmlan Barua Input parameter: 1393564f093SHong Zhang A - the matrix 14097504071SAmlan Barua x - the vector on which BDFT will be performed 14197504071SAmlan Barua 14297504071SAmlan Barua Output parameter: 14397504071SAmlan Barua y - vector that stores result of BDFT 14497504071SAmlan Barua */ 14597504071SAmlan Barua 146b2b77a04SHong Zhang #undef __FUNCT__ 147b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_SeqFFTW" 148b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 149b2b77a04SHong Zhang { 150b2b77a04SHong Zhang PetscErrorCode ierr; 151b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 152b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 153f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 154f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 155b2b77a04SHong Zhang PetscInt ndim=fft->ndim,*dim=fft->dim; 1561acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX) 157a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 158a1b6d50cSHong Zhang fftw_iodim64 *iodims=fftw->iodims; 159a1b6d50cSHong Zhang #else 160a1b6d50cSHong Zhang fftw_iodim *iodims=fftw->iodims; 161a1b6d50cSHong Zhang #endif 1621acd80e4SHong Zhang #endif 163b2b77a04SHong Zhang 164b2b77a04SHong Zhang PetscFunctionBegin; 165f4fca6d4SMatthew G. Knepley ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 166b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 167b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 168b2b77a04SHong Zhang switch (ndim) { 169b2b77a04SHong Zhang case 1: 17058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 171b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 17258a912c5SAmlan Barua #else 173e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 17458a912c5SAmlan Barua #endif 175b2b77a04SHong Zhang break; 176b2b77a04SHong Zhang case 2: 17758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 178b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 17958a912c5SAmlan Barua #else 180e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 18158a912c5SAmlan Barua #endif 182b2b77a04SHong Zhang break; 183b2b77a04SHong Zhang case 3: 18458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 185b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 18658a912c5SAmlan Barua #else 187e81bb599SAmlan Barua fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 18858a912c5SAmlan Barua #endif 189b2b77a04SHong Zhang break; 190b2b77a04SHong Zhang default: 19158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX) 192a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 193a1b6d50cSHong Zhang fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 194a1b6d50cSHong Zhang #else 1958c1d5ab3SHong Zhang fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 196a1b6d50cSHong Zhang #endif 19758a912c5SAmlan Barua #else 198a31b9140SHong Zhang fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 19958a912c5SAmlan Barua #endif 200b2b77a04SHong Zhang break; 201b2b77a04SHong Zhang } 202f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *) x_array; 203b2b77a04SHong Zhang fftw->boutarray = y_array; 204b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 205b2b77a04SHong Zhang } else { /* use existing plan */ 206b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 2077e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX) 208b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 2097e4bc134SDominic Meiser #else 2107e4bc134SDominic Meiser fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array); 2117e4bc134SDominic Meiser #endif 212b2b77a04SHong Zhang } else { 213b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 214b2b77a04SHong Zhang } 215b2b77a04SHong Zhang } 216b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 217f4fca6d4SMatthew G. Knepley ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 218b2b77a04SHong Zhang PetscFunctionReturn(0); 219b2b77a04SHong Zhang } 220b2b77a04SHong Zhang 22197504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel 22297504071SAmlan Barua Input parameter: 2233564f093SHong Zhang A - the matrix 22497504071SAmlan Barua x - the vector on which FDFT will be performed 22597504071SAmlan Barua 22697504071SAmlan Barua Output parameter: 22797504071SAmlan Barua y - vector that stores result of FDFT 22897504071SAmlan Barua */ 229b2b77a04SHong Zhang #undef __FUNCT__ 230b2b77a04SHong Zhang #define __FUNCT__ "MatMult_MPIFFTW" 231b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 232b2b77a04SHong Zhang { 233b2b77a04SHong Zhang PetscErrorCode ierr; 234b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 235b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 236f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 237f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 238c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 239ce94432eSBarry Smith MPI_Comm comm; 240b2b77a04SHong Zhang 241b2b77a04SHong Zhang PetscFunctionBegin; 242ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 243f4fca6d4SMatthew G. Knepley ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 244b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 245b2b77a04SHong Zhang if (!fftw->p_forward) { /* create a plan, then excute it */ 246b2b77a04SHong Zhang switch (ndim) { 247b2b77a04SHong Zhang case 1: 2483c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 249b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 250ae0a50aaSHong Zhang #else 2514f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 2523c3a9c75SAmlan Barua #endif 253b2b77a04SHong Zhang break; 254b2b77a04SHong Zhang case 2: 25597504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 256b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2573c3a9c75SAmlan Barua #else 2583c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2593c3a9c75SAmlan Barua #endif 260b2b77a04SHong Zhang break; 261b2b77a04SHong Zhang case 3: 2623c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 263b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2643c3a9c75SAmlan Barua #else 26551d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2663c3a9c75SAmlan Barua #endif 267b2b77a04SHong Zhang break; 268b2b77a04SHong Zhang default: 2693c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 270c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 2713c3a9c75SAmlan Barua #else 2723c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 2733c3a9c75SAmlan Barua #endif 274b2b77a04SHong Zhang break; 275b2b77a04SHong Zhang } 276f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *) x_array; 277b2b77a04SHong Zhang fftw->foutarray = y_array; 278b2b77a04SHong Zhang /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 279b2b77a04SHong Zhang planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 280b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 281b2b77a04SHong Zhang } else { /* use existing plan */ 282b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 283b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 284b2b77a04SHong Zhang } else { 285b2b77a04SHong Zhang fftw_execute(fftw->p_forward); 286b2b77a04SHong Zhang } 287b2b77a04SHong Zhang } 288b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 289f4fca6d4SMatthew G. Knepley ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 290b2b77a04SHong Zhang PetscFunctionReturn(0); 291b2b77a04SHong Zhang } 292b2b77a04SHong Zhang 29397504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT 29497504071SAmlan Barua Input parameter: 2953564f093SHong Zhang A - the matrix 29697504071SAmlan Barua x - the vector on which BDFT will be performed 29797504071SAmlan Barua 29897504071SAmlan Barua Output parameter: 29997504071SAmlan Barua y - vector that stores result of BDFT 30097504071SAmlan Barua */ 301b2b77a04SHong Zhang #undef __FUNCT__ 302b2b77a04SHong Zhang #define __FUNCT__ "MatMultTranspose_MPIFFTW" 303b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 304b2b77a04SHong Zhang { 305b2b77a04SHong Zhang PetscErrorCode ierr; 306b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 307b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 308f4fca6d4SMatthew G. Knepley const PetscScalar *x_array; 309f4fca6d4SMatthew G. Knepley PetscScalar *y_array; 310c92418ccSAmlan Barua PetscInt ndim=fft->ndim,*dim=fft->dim; 311ce94432eSBarry Smith MPI_Comm comm; 312b2b77a04SHong Zhang 313b2b77a04SHong Zhang PetscFunctionBegin; 314ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 315f4fca6d4SMatthew G. Knepley ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 316b2b77a04SHong Zhang ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 317b2b77a04SHong Zhang if (!fftw->p_backward) { /* create a plan, then excute it */ 318b2b77a04SHong Zhang switch (ndim) { 319b2b77a04SHong Zhang case 1: 3203c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 321b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 322ae0a50aaSHong Zhang #else 3234f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 3243c3a9c75SAmlan Barua #endif 325b2b77a04SHong Zhang break; 326b2b77a04SHong Zhang case 2: 32797504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 328b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3293c3a9c75SAmlan Barua #else 3303c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3313c3a9c75SAmlan Barua #endif 332b2b77a04SHong Zhang break; 333b2b77a04SHong Zhang case 3: 3343c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 335b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3363c3a9c75SAmlan Barua #else 3373c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3383c3a9c75SAmlan Barua #endif 339b2b77a04SHong Zhang break; 340b2b77a04SHong Zhang default: 3413c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX) 342c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 3433c3a9c75SAmlan Barua #else 3443c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 3453c3a9c75SAmlan Barua #endif 346b2b77a04SHong Zhang break; 347b2b77a04SHong Zhang } 348f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *) x_array; 349b2b77a04SHong Zhang fftw->boutarray = y_array; 350b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 351b2b77a04SHong Zhang } else { /* use existing plan */ 352b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 353b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 354b2b77a04SHong Zhang } else { 355b2b77a04SHong Zhang fftw_execute(fftw->p_backward);CHKERRQ(ierr); 356b2b77a04SHong Zhang } 357b2b77a04SHong Zhang } 358b2b77a04SHong Zhang ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 359f4fca6d4SMatthew G. Knepley ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 360b2b77a04SHong Zhang PetscFunctionReturn(0); 361b2b77a04SHong Zhang } 362b2b77a04SHong Zhang 363b2b77a04SHong Zhang #undef __FUNCT__ 364b2b77a04SHong Zhang #define __FUNCT__ "MatDestroy_FFTW" 365b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A) 366b2b77a04SHong Zhang { 367b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT*)A->data; 368b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 369b2b77a04SHong Zhang PetscErrorCode ierr; 370b2b77a04SHong Zhang 371b2b77a04SHong Zhang PetscFunctionBegin; 372b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward); 373b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward); 3748c1d5ab3SHong Zhang if (fftw->iodims) { 3758c1d5ab3SHong Zhang free(fftw->iodims); 3768c1d5ab3SHong Zhang } 377bb5bf6f6SHong Zhang ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 378bf0cc555SLisandro Dalcin ierr = PetscFree(fft->data);CHKERRQ(ierr); 3796ccf0b3eSHong Zhang fftw_mpi_cleanup(); 380b2b77a04SHong Zhang PetscFunctionReturn(0); 381b2b77a04SHong Zhang } 382b2b77a04SHong Zhang 383c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 384b2b77a04SHong Zhang #undef __FUNCT__ 385b2b77a04SHong Zhang #define __FUNCT__ "VecDestroy_MPIFFTW" 386b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v) 387b2b77a04SHong Zhang { 388b2b77a04SHong Zhang PetscErrorCode ierr; 389b2b77a04SHong Zhang PetscScalar *array; 390b2b77a04SHong Zhang 391b2b77a04SHong Zhang PetscFunctionBegin; 392b2b77a04SHong Zhang ierr = VecGetArray(v,&array);CHKERRQ(ierr); 393b2b77a04SHong Zhang fftw_free((fftw_complex*)array);CHKERRQ(ierr); 394b2b77a04SHong Zhang ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 395b2b77a04SHong Zhang ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 396b2b77a04SHong Zhang PetscFunctionReturn(0); 397b2b77a04SHong Zhang } 398b2b77a04SHong Zhang 3994f7415efSAmlan Barua #undef __FUNCT__ 4002a7a6963SBarry Smith #define __FUNCT__ "MatCreateVecsFFTW" 4014be45526SHong Zhang /*@ 4022a7a6963SBarry Smith MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 4034f7415efSAmlan Barua parallel layout determined by FFTW 4044f7415efSAmlan Barua 4054f7415efSAmlan Barua Collective on Mat 4064f7415efSAmlan Barua 4074f7415efSAmlan Barua Input Parameter: 4083564f093SHong Zhang . A - the matrix 4094f7415efSAmlan Barua 4104f7415efSAmlan Barua Output Parameter: 411cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW 412cc55f3b1SHong Zhang - y - (optional) output vector of forward FFTW 413cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW 4144f7415efSAmlan Barua 4154f7415efSAmlan Barua Level: advanced 4163564f093SHong Zhang 41797504071SAmlan Barua Note: The parallel layout of output of forward FFTW is always same as the input 41897504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward 41997504071SAmlan Barua FFTW might not be same as the output of backward FFTW. 42097504071SAmlan Barua Also note that we need to provide enough space while doing parallel real transform. 42197504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to 42297504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 423e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 424e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two 425e0ec536eSAmlan Barua zeros if it is odd only one zero is needed. 426e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local 427e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for 428e0ec536eSAmlan Barua each processor and returns that. 4294f7415efSAmlan Barua 4304f7415efSAmlan Barua .seealso: MatCreateFFTW() 4314be45526SHong Zhang @*/ 4322a7a6963SBarry Smith PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 4334be45526SHong Zhang { 4344be45526SHong Zhang PetscErrorCode ierr; 435b4c3921fSHong Zhang 4364be45526SHong Zhang PetscFunctionBegin; 437*163d334eSBarry Smith ierr = PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 4384be45526SHong Zhang PetscFunctionReturn(0); 4394be45526SHong Zhang } 4404be45526SHong Zhang 4414be45526SHong Zhang #undef __FUNCT__ 4422a7a6963SBarry Smith #define __FUNCT__ "MatCreateVecsFFTW_FFTW" 4432a7a6963SBarry Smith PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 4444f7415efSAmlan Barua { 4454f7415efSAmlan Barua PetscErrorCode ierr; 4464f7415efSAmlan Barua PetscMPIInt size,rank; 447ce94432eSBarry Smith MPI_Comm comm; 4484f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 4494f7415efSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 4509496c9aeSAmlan Barua PetscInt N = fft->N; 4514f7415efSAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 4524f7415efSAmlan Barua 4534f7415efSAmlan Barua PetscFunctionBegin; 4544f7415efSAmlan Barua PetscValidHeaderSpecific(A,MAT_CLASSID,1); 4554f7415efSAmlan Barua PetscValidType(A,1); 456ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4574f7415efSAmlan Barua 4584f8276c3SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 4594f8276c3SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 4604f7415efSAmlan Barua if (size == 1) { /* sequential case */ 4614f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4624f7415efSAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 4634f7415efSAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 4644f7415efSAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 4654f7415efSAmlan Barua #else 4668ca4c034SAmlan Barua if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 4678ca4c034SAmlan Barua if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 4688ca4c034SAmlan Barua if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 4694f7415efSAmlan Barua #endif 47097504071SAmlan Barua } else { /* parallel cases */ 4714f7415efSAmlan Barua ptrdiff_t alloc_local,local_n0,local_0_start; 4729496c9aeSAmlan Barua ptrdiff_t local_n1; 4739496c9aeSAmlan Barua fftw_complex *data_fout; 4749496c9aeSAmlan Barua ptrdiff_t local_1_start; 4759496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX) 4769496c9aeSAmlan Barua fftw_complex *data_fin,*data_bout; 4779496c9aeSAmlan Barua #else 4784f7415efSAmlan Barua double *data_finr,*data_boutr; 4796ccf0b3eSHong Zhang PetscInt n1,N1; 4809496c9aeSAmlan Barua ptrdiff_t temp; 4819496c9aeSAmlan Barua #endif 4829496c9aeSAmlan Barua 4834f7415efSAmlan Barua switch (ndim) { 4844f7415efSAmlan Barua case 1: 48557625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 4864f8276c3SHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 48757625b84SAmlan Barua #else 48857625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 48957625b84SAmlan Barua if (fin) { 49057625b84SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 491778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 49226fbe8dcSKarl Rupp 49357625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 49457625b84SAmlan Barua } 49557625b84SAmlan Barua if (fout) { 49657625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 497778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 49826fbe8dcSKarl Rupp 49957625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 50057625b84SAmlan Barua } 50157625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 50257625b84SAmlan Barua if (bout) { 50357625b84SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 504778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 50526fbe8dcSKarl Rupp 50657625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 50757625b84SAmlan Barua } 50857625b84SAmlan Barua break; 50957625b84SAmlan Barua #endif 5104f7415efSAmlan Barua case 2: 51197504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 5124f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5134f7415efSAmlan Barua N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 5144f7415efSAmlan Barua if (fin) { 5154f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 516778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 51726fbe8dcSKarl Rupp 5184f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5194f7415efSAmlan Barua } 5204f7415efSAmlan Barua if (bout) { 5214f7415efSAmlan Barua data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 522778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 52326fbe8dcSKarl Rupp 5244f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5254f7415efSAmlan Barua } 52657625b84SAmlan Barua if (fout) { 52757625b84SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 528778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 52926fbe8dcSKarl Rupp 53057625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 53157625b84SAmlan Barua } 5324f7415efSAmlan Barua #else 5334f7415efSAmlan Barua /* Get local size */ 5344f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 5354f7415efSAmlan Barua if (fin) { 5364f7415efSAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 537778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 53826fbe8dcSKarl Rupp 5394f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5404f7415efSAmlan Barua } 5414f7415efSAmlan Barua if (fout) { 5424f7415efSAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 543778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 54426fbe8dcSKarl Rupp 5454f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5464f7415efSAmlan Barua } 5474f7415efSAmlan Barua if (bout) { 5484f7415efSAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 549778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 55026fbe8dcSKarl Rupp 5514f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5524f7415efSAmlan Barua } 5534f7415efSAmlan Barua #endif 5544f7415efSAmlan Barua break; 5554f7415efSAmlan Barua case 3: 5564f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 5574f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 5584f7415efSAmlan Barua N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 5594f7415efSAmlan Barua if (fin) { 5604f7415efSAmlan Barua data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 561778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 56226fbe8dcSKarl Rupp 5634f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5644f7415efSAmlan Barua } 5654f7415efSAmlan Barua if (bout) { 5664f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 567778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 56826fbe8dcSKarl Rupp 5694f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5704f7415efSAmlan Barua } 5713564f093SHong Zhang 57257625b84SAmlan Barua if (fout) { 57357625b84SAmlan Barua data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 574778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 57526fbe8dcSKarl Rupp 57657625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 57757625b84SAmlan Barua } 5784f7415efSAmlan Barua #else 5790c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 5800c9b18e4SAmlan Barua if (fin) { 5810c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 582778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 58326fbe8dcSKarl Rupp 5840c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 5850c9b18e4SAmlan Barua } 5860c9b18e4SAmlan Barua if (fout) { 5870c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 588778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 58926fbe8dcSKarl Rupp 5900c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 5910c9b18e4SAmlan Barua } 5920c9b18e4SAmlan Barua if (bout) { 5930c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 594778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 59526fbe8dcSKarl Rupp 5960c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 5970c9b18e4SAmlan Barua } 5984f7415efSAmlan Barua #endif 5994f7415efSAmlan Barua break; 6004f7415efSAmlan Barua default: 6014f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 6024f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 60326fbe8dcSKarl Rupp 6044f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 60526fbe8dcSKarl Rupp 6064f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 6074f7415efSAmlan Barua N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 60826fbe8dcSKarl Rupp 6094f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 6104f7415efSAmlan Barua 6114f7415efSAmlan Barua if (fin) { 6124f7415efSAmlan Barua data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 613778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 61426fbe8dcSKarl Rupp 6154f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6164f7415efSAmlan Barua } 6174f7415efSAmlan Barua if (bout) { 6184f7415efSAmlan Barua data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 619778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 62026fbe8dcSKarl Rupp 6219496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6224f7415efSAmlan Barua } 6233564f093SHong Zhang 62457625b84SAmlan Barua if (fout) { 62557625b84SAmlan Barua data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 626778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 62726fbe8dcSKarl Rupp 62857625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 62957625b84SAmlan Barua } 6304f7415efSAmlan Barua #else 6310c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 6320c9b18e4SAmlan Barua if (fin) { 6330c9b18e4SAmlan Barua data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 634778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 63526fbe8dcSKarl Rupp 6360c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW; 6370c9b18e4SAmlan Barua } 6380c9b18e4SAmlan Barua if (fout) { 6390c9b18e4SAmlan Barua data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 640778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 64126fbe8dcSKarl Rupp 6420c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW; 6430c9b18e4SAmlan Barua } 6440c9b18e4SAmlan Barua if (bout) { 6450c9b18e4SAmlan Barua data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 646778a2246SBarry Smith ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 64726fbe8dcSKarl Rupp 6480c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW; 6490c9b18e4SAmlan Barua } 6504f7415efSAmlan Barua #endif 6514f7415efSAmlan Barua break; 6524f7415efSAmlan Barua } 6534f7415efSAmlan Barua } 6544f7415efSAmlan Barua PetscFunctionReturn(0); 6554f7415efSAmlan Barua } 6564f7415efSAmlan Barua 657c92418ccSAmlan Barua #undef __FUNCT__ 65874a26884SAmlan Barua #define __FUNCT__ "VecScatterPetscToFFTW" 6593564f093SHong Zhang /*@ 6603564f093SHong Zhang VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 66154efbe56SHong Zhang 6623564f093SHong Zhang Collective on Mat 6633564f093SHong Zhang 6643564f093SHong Zhang Input Parameters: 6653564f093SHong Zhang + A - FFTW matrix 6663564f093SHong Zhang - x - the PETSc vector 6673564f093SHong Zhang 6683564f093SHong Zhang Output Parameters: 6693564f093SHong Zhang . y - the FFTW vector 6703564f093SHong Zhang 671b2b77a04SHong Zhang Options Database Keys: 6723564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags 673b2b77a04SHong Zhang 674b2b77a04SHong Zhang Level: intermediate 6753564f093SHong Zhang 67697504071SAmlan Barua Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 67797504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 67897504071SAmlan Barua zeros. This routine does that job by scattering operation. 67997504071SAmlan Barua 6803564f093SHong Zhang .seealso: VecScatterFFTWToPetsc() 6813564f093SHong Zhang @*/ 6823564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 6833564f093SHong Zhang { 6843564f093SHong Zhang PetscErrorCode ierr; 685b2b77a04SHong Zhang 6863564f093SHong Zhang PetscFunctionBegin; 687*163d334eSBarry Smith ierr = PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 6883564f093SHong Zhang PetscFunctionReturn(0); 6893564f093SHong Zhang } 6903c3a9c75SAmlan Barua 6913c3a9c75SAmlan Barua #undef __FUNCT__ 6921986ecc6SHong Zhang #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 69374a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 6943c3a9c75SAmlan Barua { 6953c3a9c75SAmlan Barua PetscErrorCode ierr; 696ce94432eSBarry Smith MPI_Comm comm; 6973c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 6983c3a9c75SAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 6999496c9aeSAmlan Barua PetscInt N =fft->N; 700b5d11533SAmlan Barua PetscInt ndim =fft->ndim,*dim=fft->dim; 7019496c9aeSAmlan Barua PetscInt low; 7027a21806cSHong Zhang PetscMPIInt rank,size; 7037a21806cSHong Zhang PetscInt vsize,vsize1; 7047a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 7059496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 7063c3a9c75SAmlan Barua VecScatter vecscat; 7073c3a9c75SAmlan Barua IS list1,list2; 7089496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 7099496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 7109496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 711a31b9140SHong Zhang PetscInt NM; 7129496c9aeSAmlan Barua ptrdiff_t temp; 7139496c9aeSAmlan Barua #endif 7143c3a9c75SAmlan Barua 7153564f093SHong Zhang PetscFunctionBegin; 716ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 717f76f76beSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 718f76f76beSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 7190298fd71SBarry Smith ierr = VecGetOwnershipRange(y,&low,NULL); 7203c3a9c75SAmlan Barua 7213564f093SHong Zhang if (size==1) { 7228ca4c034SAmlan Barua ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 7238ca4c034SAmlan Barua ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 7249496c9aeSAmlan Barua ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 7256971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 7266971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7276971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7286971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 729b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 7303564f093SHong Zhang } else { 7313c3a9c75SAmlan Barua switch (ndim) { 7323c3a9c75SAmlan Barua case 1: 73364657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7347a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 73526fbe8dcSKarl Rupp 7364f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 7374f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0,low,1,&list2); 73864657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 73964657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 74064657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 74164657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 74264657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 74364657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 74464657d84SAmlan Barua #else 745e5d7f247SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 74664657d84SAmlan Barua #endif 7473c3a9c75SAmlan Barua break; 7483c3a9c75SAmlan Barua case 2: 749bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7507a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 75126fbe8dcSKarl Rupp 7524f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 7534f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 754bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 755bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 756bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 757bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 758bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 759bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 760bd59e6c6SAmlan Barua #else 761c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 76226fbe8dcSKarl Rupp 763854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 764854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 7653c3a9c75SAmlan Barua 7663564f093SHong Zhang if (dim[1]%2==0) { 7673c3a9c75SAmlan Barua NM = dim[1]+2; 7683564f093SHong Zhang } else { 7693c3a9c75SAmlan Barua NM = dim[1]+1; 7703564f093SHong Zhang } 7713c3a9c75SAmlan Barua for (i=0; i<local_n0; i++) { 7723c3a9c75SAmlan Barua for (j=0; j<dim[1]; j++) { 7733c3a9c75SAmlan Barua tempindx = i*dim[1] + j; 7743c3a9c75SAmlan Barua tempindx1 = i*NM + j; 77526fbe8dcSKarl Rupp 7765b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 7773c3a9c75SAmlan Barua indx2[tempindx]=low+tempindx1; 7783c3a9c75SAmlan Barua } 7793c3a9c75SAmlan Barua } 7803c3a9c75SAmlan Barua 7813c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 7823c3a9c75SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 7833c3a9c75SAmlan Barua 784f76f76beSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 785f76f76beSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 786f76f76beSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 787f76f76beSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 788b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 789b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 790b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 791b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 792bd59e6c6SAmlan Barua #endif 7939496c9aeSAmlan Barua break; 7943c3a9c75SAmlan Barua 7953c3a9c75SAmlan Barua case 3: 796bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 7977a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 79826fbe8dcSKarl Rupp 7994f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 8004f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 801bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 802bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 803bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 804bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 805bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 806bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 807bd59e6c6SAmlan Barua #else 808f1ade23cSHong Zhang /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */ 809f1ade23cSHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform"); 8107a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 81126fbe8dcSKarl Rupp 812854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 813854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 81451d1eed7SAmlan Barua 815db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 816db4deed7SKarl Rupp else NM = dim[2]+1; 81751d1eed7SAmlan Barua 81851d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 81951d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 82051d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 82151d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 82251d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 82326fbe8dcSKarl Rupp 82451d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 82551d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 82651d1eed7SAmlan Barua } 82751d1eed7SAmlan Barua } 82851d1eed7SAmlan Barua } 82951d1eed7SAmlan Barua 83051d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 83151d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 83251d1eed7SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 83351d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 83451d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 83551d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 836b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 837b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 838b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 839b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 840bd59e6c6SAmlan Barua #endif 8419496c9aeSAmlan Barua break; 8423c3a9c75SAmlan Barua 8433c3a9c75SAmlan Barua default: 844bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 8457a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 84626fbe8dcSKarl Rupp 8474f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 8484f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 849bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 850bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 851bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 852bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 853bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 854bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 855bd59e6c6SAmlan Barua #else 856f1ade23cSHong Zhang /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */ 857f1ade23cSHong Zhang SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform"); 858e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 85926fbe8dcSKarl Rupp 860e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 86126fbe8dcSKarl Rupp 8627a21806cSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 86326fbe8dcSKarl Rupp 864e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 865e5d7f247SAmlan Barua 866e5d7f247SAmlan Barua partial_dim = fftw->partial_dim; 867e5d7f247SAmlan Barua 868854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 869854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 870e5d7f247SAmlan Barua 871db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 872db4deed7SKarl Rupp else NM = 1; 873e5d7f247SAmlan Barua 8746971673cSAmlan Barua j = low; 8753564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 8766971673cSAmlan Barua indx1[i] = local_0_start*partial_dim + i; 8776971673cSAmlan Barua indx2[i] = j; 87826fbe8dcSKarl Rupp if (k%dim[ndim-1]==0) j+=NM; 8796971673cSAmlan Barua j++; 8806971673cSAmlan Barua } 8816971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 8826971673cSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 8836971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 8846971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8856971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8866971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 887b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 888b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 889b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 890b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 891bd59e6c6SAmlan Barua #endif 8929496c9aeSAmlan Barua break; 8933c3a9c75SAmlan Barua } 894e81bb599SAmlan Barua } 8953564f093SHong Zhang PetscFunctionReturn(0); 8963c3a9c75SAmlan Barua } 8973c3a9c75SAmlan Barua 8983c3a9c75SAmlan Barua #undef __FUNCT__ 89974a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc" 9003564f093SHong Zhang /*@ 9013564f093SHong Zhang VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 9023564f093SHong Zhang 9033564f093SHong Zhang Collective on Mat 9043564f093SHong Zhang 9053564f093SHong Zhang Input Parameters: 9063564f093SHong Zhang + A - FFTW matrix 9073564f093SHong Zhang - x - FFTW vector 9083564f093SHong Zhang 9093564f093SHong Zhang Output Parameters: 9103564f093SHong Zhang . y - PETSc vector 9113564f093SHong Zhang 9123564f093SHong Zhang Level: intermediate 9133564f093SHong Zhang 9143564f093SHong Zhang Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 9153564f093SHong Zhang VecScatterFFTWToPetsc removes those extra zeros. 9163564f093SHong Zhang 9173564f093SHong Zhang .seealso: VecScatterPetscToFFTW() 9183564f093SHong Zhang @*/ 91974a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 9203c3a9c75SAmlan Barua { 9213c3a9c75SAmlan Barua PetscErrorCode ierr; 9225fd66863SKarl Rupp 9233c3a9c75SAmlan Barua PetscFunctionBegin; 924*163d334eSBarry Smith ierr = PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 9253c3a9c75SAmlan Barua PetscFunctionReturn(0); 9263c3a9c75SAmlan Barua } 92754efbe56SHong Zhang 9283c3a9c75SAmlan Barua #undef __FUNCT__ 92974a26884SAmlan Barua #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 93074a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 9315b3e41ffSAmlan Barua { 9325b3e41ffSAmlan Barua PetscErrorCode ierr; 933ce94432eSBarry Smith MPI_Comm comm; 9345b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT*)A->data; 9355b3e41ffSAmlan Barua Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 9369496c9aeSAmlan Barua PetscInt N = fft->N; 937b3655f67SAmlan Barua PetscInt ndim = fft->ndim,*dim=fft->dim; 9389496c9aeSAmlan Barua PetscInt low; 9397a21806cSHong Zhang PetscMPIInt rank,size; 9407a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 9419496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 9425b3e41ffSAmlan Barua VecScatter vecscat; 9435b3e41ffSAmlan Barua IS list1,list2; 9449496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 9459496c9aeSAmlan Barua PetscInt i,j,k,partial_dim; 9469496c9aeSAmlan Barua PetscInt *indx1, *indx2, tempindx, tempindx1; 947a31b9140SHong Zhang PetscInt NM; 9489496c9aeSAmlan Barua ptrdiff_t temp; 9499496c9aeSAmlan Barua #endif 9509496c9aeSAmlan Barua 9513564f093SHong Zhang PetscFunctionBegin; 952ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 9535b3e41ffSAmlan Barua ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 9545b3e41ffSAmlan Barua ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 9550298fd71SBarry Smith ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 9565b3e41ffSAmlan Barua 957e81bb599SAmlan Barua if (size==1) { 958b3655f67SAmlan Barua ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 9596971673cSAmlan Barua ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 9606971673cSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9616971673cSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9626971673cSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 963b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 964e81bb599SAmlan Barua 9653564f093SHong Zhang } else { 966e81bb599SAmlan Barua switch (ndim) { 967e81bb599SAmlan Barua case 1: 96864657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX) 9697a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 97026fbe8dcSKarl Rupp 9714f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 9724f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n1,low,1,&list2); 97364657d84SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 97464657d84SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 97564657d84SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 97664657d84SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 97764657d84SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 97864657d84SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 97964657d84SAmlan Barua #else 9806a506ed8SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 98164657d84SAmlan Barua #endif 9825b3e41ffSAmlan Barua break; 9835b3e41ffSAmlan Barua case 2: 984bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 9857a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 98626fbe8dcSKarl Rupp 9874f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 9884f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 989bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 990bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 991bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 993bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 994bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 995bd59e6c6SAmlan Barua #else 9967a21806cSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 99726fbe8dcSKarl Rupp 998854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 999854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 10005b3e41ffSAmlan Barua 1001db4deed7SKarl Rupp if (dim[1]%2==0) NM = dim[1]+2; 1002db4deed7SKarl Rupp else NM = dim[1]+1; 10035b3e41ffSAmlan Barua 10045b3e41ffSAmlan Barua for (i=0; i<local_n0; i++) { 10055b3e41ffSAmlan Barua for (j=0; j<dim[1]; j++) { 10065b3e41ffSAmlan Barua tempindx = i*dim[1] + j; 10075b3e41ffSAmlan Barua tempindx1 = i*NM + j; 100826fbe8dcSKarl Rupp 10095b3e41ffSAmlan Barua indx1[tempindx]=local_0_start*dim[1]+tempindx; 10105b3e41ffSAmlan Barua indx2[tempindx]=low+tempindx1; 10115b3e41ffSAmlan Barua } 10125b3e41ffSAmlan Barua } 10135b3e41ffSAmlan Barua 10145b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 10155b3e41ffSAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 10165b3e41ffSAmlan Barua 10175b3e41ffSAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 10185b3e41ffSAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10195b3e41ffSAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 10205b3e41ffSAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1021b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1022b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1023b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1024b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1025bd59e6c6SAmlan Barua #endif 10269496c9aeSAmlan Barua break; 10275b3e41ffSAmlan Barua case 3: 1028bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10297a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 103026fbe8dcSKarl Rupp 10314f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 10324f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 1033bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1034bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1035bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1037bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1038bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1039bd59e6c6SAmlan Barua #else 10407a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 104126fbe8dcSKarl Rupp 1042854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1043854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 104451d1eed7SAmlan Barua 1045db4deed7SKarl Rupp if (dim[2]%2==0) NM = dim[2]+2; 1046db4deed7SKarl Rupp else NM = dim[2]+1; 104751d1eed7SAmlan Barua 104851d1eed7SAmlan Barua for (i=0; i<local_n0; i++) { 104951d1eed7SAmlan Barua for (j=0; j<dim[1]; j++) { 105051d1eed7SAmlan Barua for (k=0; k<dim[2]; k++) { 105151d1eed7SAmlan Barua tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 105251d1eed7SAmlan Barua tempindx1 = i*dim[1]*NM + j*NM + k; 105326fbe8dcSKarl Rupp 105451d1eed7SAmlan Barua indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 105551d1eed7SAmlan Barua indx2[tempindx]=low+tempindx1; 105651d1eed7SAmlan Barua } 105751d1eed7SAmlan Barua } 105851d1eed7SAmlan Barua } 105951d1eed7SAmlan Barua 106051d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 106151d1eed7SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 106251d1eed7SAmlan Barua 106351d1eed7SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 106451d1eed7SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 106551d1eed7SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 106651d1eed7SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1067b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1068b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1069b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1070b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1071bd59e6c6SAmlan Barua #endif 10729496c9aeSAmlan Barua break; 10735b3e41ffSAmlan Barua default: 1074bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX) 10757a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 107626fbe8dcSKarl Rupp 10774f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 10784f8276c3SHong Zhang ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1079bd59e6c6SAmlan Barua ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1080bd59e6c6SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1081bd59e6c6SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1082bd59e6c6SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1083bd59e6c6SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1084bd59e6c6SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1085bd59e6c6SAmlan Barua #else 1086ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 108726fbe8dcSKarl Rupp 1088ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 108926fbe8dcSKarl Rupp 1090c3eba89aSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 109126fbe8dcSKarl Rupp 1092ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1093ba6e06d0SAmlan Barua 1094ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim; 1095ba6e06d0SAmlan Barua 1096854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1097854ce69bSBarry Smith ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1098ba6e06d0SAmlan Barua 1099db4deed7SKarl Rupp if (dim[ndim-1]%2==0) NM = 2; 1100db4deed7SKarl Rupp else NM = 1; 1101ba6e06d0SAmlan Barua 1102ba6e06d0SAmlan Barua j = low; 11033564f093SHong Zhang for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1104ba6e06d0SAmlan Barua indx1[i] = local_0_start*partial_dim + i; 1105ba6e06d0SAmlan Barua indx2[i] = j; 11063564f093SHong Zhang if (k%dim[ndim-1]==0) j+=NM; 1107ba6e06d0SAmlan Barua j++; 1108ba6e06d0SAmlan Barua } 1109ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1110ba6e06d0SAmlan Barua ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1111ba6e06d0SAmlan Barua 1112ba6e06d0SAmlan Barua ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1113ba6e06d0SAmlan Barua ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114ba6e06d0SAmlan Barua ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1115ba6e06d0SAmlan Barua ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1116b223cc97SAmlan Barua ierr = ISDestroy(&list1);CHKERRQ(ierr); 1117b223cc97SAmlan Barua ierr = ISDestroy(&list2);CHKERRQ(ierr); 1118b223cc97SAmlan Barua ierr = PetscFree(indx1);CHKERRQ(ierr); 1119b223cc97SAmlan Barua ierr = PetscFree(indx2);CHKERRQ(ierr); 1120bd59e6c6SAmlan Barua #endif 11219496c9aeSAmlan Barua break; 11225b3e41ffSAmlan Barua } 1123e81bb599SAmlan Barua } 11243564f093SHong Zhang PetscFunctionReturn(0); 11255b3e41ffSAmlan Barua } 11265b3e41ffSAmlan Barua 11275b3e41ffSAmlan Barua #undef __FUNCT__ 11283c3a9c75SAmlan Barua #define __FUNCT__ "MatCreate_FFTW" 11293c3a9c75SAmlan Barua /* 11303564f093SHong Zhang MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 11313564f093SHong Zhang 11323c3a9c75SAmlan Barua Options Database Keys: 11333c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags 11343c3a9c75SAmlan Barua 11353c3a9c75SAmlan Barua Level: intermediate 11363c3a9c75SAmlan Barua 11373c3a9c75SAmlan Barua */ 11388cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1139b2b77a04SHong Zhang { 1140b2b77a04SHong Zhang PetscErrorCode ierr; 1141ce94432eSBarry Smith MPI_Comm comm; 1142b2b77a04SHong Zhang Mat_FFT *fft=(Mat_FFT*)A->data; 1143b2b77a04SHong Zhang Mat_FFTW *fftw; 1144b2b77a04SHong Zhang PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 11455274ed1bSHong Zhang const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 11465274ed1bSHong Zhang unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1147b2b77a04SHong Zhang PetscBool flg; 11484f48bc67SAmlan Barua PetscInt p_flag,partial_dim=1,ctr; 114911902ff2SHong Zhang PetscMPIInt size,rank; 11509496c9aeSAmlan Barua ptrdiff_t *pdim; 11519496c9aeSAmlan Barua ptrdiff_t local_n1,local_1_start; 11529496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11539496c9aeSAmlan Barua ptrdiff_t temp; 11548ca4c034SAmlan Barua PetscInt N1,tot_dim; 11554f48bc67SAmlan Barua #else 11564f48bc67SAmlan Barua PetscInt n1; 11579496c9aeSAmlan Barua #endif 11589496c9aeSAmlan Barua 1159b2b77a04SHong Zhang PetscFunctionBegin; 1160ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1161b2b77a04SHong Zhang ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 116211902ff2SHong Zhang ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1163c92418ccSAmlan Barua 11641878d478SAmlan Barua fftw_mpi_init(); 116511902ff2SHong Zhang pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 116611902ff2SHong Zhang pdim[0] = dim[0]; 11678ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11688ca4c034SAmlan Barua tot_dim = 2*dim[0]; 11698ca4c034SAmlan Barua #endif 11703564f093SHong Zhang for (ctr=1; ctr<ndim; ctr++) { 11716882372aSHong Zhang partial_dim *= dim[ctr]; 117211902ff2SHong Zhang pdim[ctr] = dim[ctr]; 11738ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 1174db4deed7SKarl Rupp if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1175db4deed7SKarl Rupp else tot_dim *= dim[ctr]; 11768ca4c034SAmlan Barua #endif 11776882372aSHong Zhang } 11783c3a9c75SAmlan Barua 1179b2b77a04SHong Zhang if (size == 1) { 1180e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX) 1181b2b77a04SHong Zhang ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1182b2b77a04SHong Zhang n = N; 1183e81bb599SAmlan Barua #else 1184e81bb599SAmlan Barua ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1185e81bb599SAmlan Barua n = tot_dim; 1186e81bb599SAmlan Barua #endif 1187e81bb599SAmlan Barua 1188b2b77a04SHong Zhang } else { 11897a21806cSHong Zhang ptrdiff_t local_n0,local_0_start; 1190b2b77a04SHong Zhang switch (ndim) { 1191b2b77a04SHong Zhang case 1: 11923c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX) 11933c3a9c75SAmlan Barua SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1194e5d7f247SAmlan Barua #else 11957a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 119626fbe8dcSKarl Rupp 11976882372aSHong Zhang n = (PetscInt)local_n0; 11989496c9aeSAmlan Barua n1 = (PetscInt)local_n1; 11999496c9aeSAmlan Barua ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1200e5d7f247SAmlan Barua #endif 1201b2b77a04SHong Zhang break; 1202b2b77a04SHong Zhang case 2: 12035b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX) 12047a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1205b2b77a04SHong Zhang /* 1206b2b77a04SHong Zhang PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 12070ec8b6e3SBarry Smith PetscSynchronizedFlush(comm,PETSC_STDOUT); 1208b2b77a04SHong Zhang */ 1209b2b77a04SHong Zhang n = (PetscInt)local_n0*dim[1]; 1210b2b77a04SHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 12115b3e41ffSAmlan Barua #else 1212c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 121326fbe8dcSKarl Rupp 12145b3e41ffSAmlan Barua n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1215c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 12165b3e41ffSAmlan Barua #endif 1217b2b77a04SHong Zhang break; 1218b2b77a04SHong Zhang case 3: 121951d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12207a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 122126fbe8dcSKarl Rupp 12226882372aSHong Zhang n = (PetscInt)local_n0*dim[1]*dim[2]; 12236882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 122451d1eed7SAmlan Barua #else 1225c3eba89aSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 122626fbe8dcSKarl Rupp 122751d1eed7SAmlan Barua n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1228c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 122951d1eed7SAmlan Barua #endif 1230b2b77a04SHong Zhang break; 1231b2b77a04SHong Zhang default: 1232b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX) 12337a21806cSHong Zhang fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 123426fbe8dcSKarl Rupp 12356882372aSHong Zhang n = (PetscInt)local_n0*partial_dim; 12366882372aSHong Zhang ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1237b3a17365SAmlan Barua #else 1238b3a17365SAmlan Barua temp = pdim[ndim-1]; 123926fbe8dcSKarl Rupp 1240b3a17365SAmlan Barua pdim[ndim-1] = temp/2 + 1; 124126fbe8dcSKarl Rupp 1242c3eba89aSHong Zhang fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 124326fbe8dcSKarl Rupp 1244b3a17365SAmlan Barua n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1245b3a17365SAmlan Barua N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 124626fbe8dcSKarl Rupp 1247b3a17365SAmlan Barua pdim[ndim-1] = temp; 124826fbe8dcSKarl Rupp 1249c8a8a4f0SAmlan Barua ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1250b3a17365SAmlan Barua #endif 1251b2b77a04SHong Zhang break; 1252b2b77a04SHong Zhang } 1253b2b77a04SHong Zhang } 1254b2b77a04SHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1255b00a9115SJed Brown ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1256b2b77a04SHong Zhang fft->data = (void*)fftw; 1257b2b77a04SHong Zhang 1258b2b77a04SHong Zhang fft->n = n; 12590c74a584SJed Brown fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1260e5d7f247SAmlan Barua fftw->partial_dim = partial_dim; 126126fbe8dcSKarl Rupp 12625e806cb5SMatthew G. Knepley ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 12638c1d5ab3SHong Zhang if (size == 1) { 1264a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES) 1265a1b6d50cSHong Zhang fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim); 1266a1b6d50cSHong Zhang #else 12678c1d5ab3SHong Zhang fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1268a1b6d50cSHong Zhang #endif 12698c1d5ab3SHong Zhang } 127026fbe8dcSKarl Rupp 1271b1301b2eSHong Zhang for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1272c92418ccSAmlan Barua 1273b2b77a04SHong Zhang fftw->p_forward = 0; 1274b2b77a04SHong Zhang fftw->p_backward = 0; 1275b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE; 1276b2b77a04SHong Zhang 1277b2b77a04SHong Zhang if (size == 1) { 1278b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW; 1279b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1280b2b77a04SHong Zhang } else { 1281b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW; 1282b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1283b2b77a04SHong Zhang } 1284b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW; 1285b2b77a04SHong Zhang A->assembled = PETSC_TRUE; 12864a52bad8SHong Zhang A->preallocated = PETSC_TRUE; 128726fbe8dcSKarl Rupp 12882a7a6963SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr); 1289bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1290bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1291b2b77a04SHong Zhang 1292b2b77a04SHong Zhang /* get runtime options */ 1293ce94432eSBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 12945274ed1bSHong Zhang ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 12955274ed1bSHong Zhang if (flg) { 12965274ed1bSHong Zhang fftw->p_flag = iplans[p_flag]; 12975274ed1bSHong Zhang } 12984a52bad8SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 1299b2b77a04SHong Zhang PetscFunctionReturn(0); 1300b2b77a04SHong Zhang } 13013c3a9c75SAmlan Barua 13023c3a9c75SAmlan Barua 13033c3a9c75SAmlan Barua 13043c3a9c75SAmlan Barua 1305