static char help[] = "Benchmarking various accessing methods of DMDA vectors on host\n\n"; /* On a machine with AMD EPYC-7452 CPUs, we got this data using one MPI rank and a serial-only Kokkos: Time (sec.), on Mar. 1, 2022 ------------------------------------------ n PETSc C Kokkos ------------------------------------------ 32 4.6464E-05 4.7451E-05 1.6880E-04 64 2.5654E-04 2.5164E-04 5.6780E-04 128 1.9383E-03 1.8878E-03 4.7938E-03 256 1.4450E-02 1.3619E-02 3.7778E-02 512 1.1580E-01 1.1551E-01 2.8428E-01 1024 1.4179E+00 1.3772E+00 2.2873E+00 Overall, C is -2% ~ 5% faster than PETSc. But Kokkos is 1.6~3.6x slower than PETSc */ #include #include #include using namespace Kokkos; using PetscScalarKokkosOffsetView3D = Kokkos::Experimental::OffsetView; using ConstPetscScalarKokkosOffsetView3D = Kokkos::Experimental::OffsetView; /* PETSc multi-dimensional array access */ static PetscErrorCode Update1(DM da,const PetscScalar ***__restrict__ x1, PetscScalar ***__restrict__ y1, PetscInt nwarm,PetscInt nloop,PetscLogDouble *avgTime) { PetscErrorCode ierr; PetscInt it,i,j,k; PetscLogDouble tstart=0.0,tend; PetscInt xm,ym,zm,xs,ys,zs,gxm,gym,gzm,gxs,gys,gzs; PetscFunctionBegin; ierr = DMDAGetCorners(da,&xs,&ys,&zs,&xm,&ym,&zm);CHKERRQ(ierr); ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gxm,&gym,&gzm);CHKERRQ(ierr); for (it=0; it>({zs,ys,xs},{zs+zm,ys+ym,xs+xm}), KOKKOS_LAMBDA(PetscInt k,PetscInt j,PetscInt i) { y3(k,j,i) = 6*x3(k,j,i) - x3(k-1,j,i) - x3(k,j-1,i) - x3(k,j,i-1) - x3(k+1,j,i) - x3(k,j+1,i) - x3(k,j,i+1); }); } ierr = PetscTime(&tend);CHKERRQ(ierr); ierr = DMDAVecRestoreKokkosOffsetView(da,x,&x3);CHKERRQ(ierr); ierr = DMDAVecRestoreKokkosOffsetView(da,y,&y3);CHKERRQ(ierr); avgTime = (tend - tstart)/nloop; ierr = PetscPrintf(PETSC_COMM_WORLD,"%4d^3 -- Kokkos average time = %e\n",len,avgTime);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = DMDestroy(&da);CHKERRQ(ierr); } ierr = PetscRandomDestroy(&rctx);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; } /*TEST build: requires: kokkos_kernels test: suffix: 1 requires: kokkos_kernels args: -min 32 -max 32 -dm_vec_type kokkos filter: grep -v "time" TEST*/