xref: /petsc/src/dm/tests/ex2k.kokkos.cxx (revision b45e3bf4ff73d80a20c3202b6cd9f79d2f2d3efe)
1 static char help[] = "Benchmarking various accessing methods of DMDA vectors on host\n\n";
2 
3 /*
4   On a machine with AMD EPYC-7452 CPUs, we got this data using one MPI rank and a serial-only Kokkos:
5            Time (sec.), on Mar. 1, 2022
6   ------------------------------------------
7   n     PETSc          C          Kokkos
8   ------------------------------------------
9   32    4.6464E-05  4.7451E-05  1.6880E-04
10   64    2.5654E-04  2.5164E-04  5.6780E-04
11   128   1.9383E-03  1.8878E-03  4.7938E-03
12   256   1.4450E-02  1.3619E-02  3.7778E-02
13   512   1.1580E-01  1.1551E-01  2.8428E-01
14   1024  1.4179E+00  1.3772E+00  2.2873E+00
15 
16   Overall, C is -2% ~ 5% faster than PETSc. But Kokkos is 1.6~3.6x slower than PETSc
17 */
18 
19 #include <petscdmda_kokkos.hpp>
20 #include <petscdm.h>
21 #include <petscdmda.h>
22 
23 using namespace Kokkos;
24 using PetscScalarKokkosOffsetView3D      = Kokkos::Experimental::OffsetView<PetscScalar***,Kokkos::LayoutRight,Kokkos::HostSpace>;
25 using ConstPetscScalarKokkosOffsetView3D = Kokkos::Experimental::OffsetView<const PetscScalar***, Kokkos::LayoutRight,Kokkos::HostSpace>;
26 
27 /* PETSc multi-dimensional array access */
28 static PetscErrorCode Update1(DM da,const PetscScalar ***__restrict__ x1, PetscScalar ***__restrict__ y1, PetscInt nwarm,PetscInt nloop,PetscLogDouble *avgTime)
29 {
30   PetscErrorCode    ierr;
31   PetscInt          it,i,j,k;
32   PetscLogDouble    tstart=0.0,tend;
33   PetscInt          xm,ym,zm,xs,ys,zs,gxm,gym,gzm,gxs,gys,gzs;
34 
35   PetscFunctionBegin;
36   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&xm,&ym,&zm);CHKERRQ(ierr);
37   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gxm,&gym,&gzm);CHKERRQ(ierr);
38   for (it=0; it<nwarm+nloop; it++) {
39     if (it == nwarm) {ierr = PetscTime(&tstart);CHKERRQ(ierr);}
40     for (k=zs; k<zs+zm; k++) {
41       for (j=ys; j<ys+ym; j++) {
42         for (i=xs; i<xs+xm; i++) {
43           y1[k][j][i] = 6*x1[k][j][i] - x1[k-1][j][i] - x1[k][j-1][i] - x1[k][j][i-1]
44                                       - x1[k+1][j][i] - x1[k][j+1][i] - x1[k][j][i+1];
45         }
46       }
47     }
48   }
49   ierr    = PetscTime(&tend);CHKERRQ(ierr);
50   *avgTime = (tend - tstart)/nloop;
51   PetscFunctionReturn(ierr);
52 }
53 
54 /* C multi-dimensional array access */
55 static PetscErrorCode Update2(DM da,const PetscScalar *__restrict__ x2, PetscScalar *__restrict__ y2, PetscInt nwarm,PetscInt nloop,PetscLogDouble *avgTime)
56 {
57   PetscErrorCode    ierr;
58   PetscInt          it,i,j,k;
59   PetscLogDouble    tstart=0.0,tend;
60   PetscInt          xm,ym,zm,xs,ys,zs,gxm,gym,gzm,gxs,gys,gzs;
61 
62   PetscFunctionBegin;
63   ierr = DMDAGetCorners(da,&xs,&ys,&zs,&xm,&ym,&zm);CHKERRQ(ierr);
64   ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gxm,&gym,&gzm);CHKERRQ(ierr);
65 #define X2(k,j,i) x2[(k-gzs)*gym*gxm+(j-gys)*gxm+(i-gxs)]
66 #define Y2(k,j,i) y2[(k-zs)*ym*xm+(j-ys)*xm+(i-xs)]
67   for (it=0; it<nwarm+nloop; it++) {
68     if (it == nwarm) {ierr = PetscTime(&tstart);CHKERRQ(ierr);}
69     for (k=zs; k<zs+zm; k++) {
70       for (j=ys; j<ys+ym; j++) {
71         for (i=xs; i<xs+xm; i++) {
72           Y2(k,j,i) = 6*X2(k,j,i) - X2(k-1,j,i) - X2(k,j-1,i) - X2(k,j,i-1)
73                                   - X2(k+1,j,i) - X2(k,j+1,i) - X2(k,j,i+1);
74         }
75       }
76     }
77   }
78   ierr    = PetscTime(&tend);CHKERRQ(ierr);
79   *avgTime = (tend - tstart)/nloop;
80 #undef X2
81 #undef Y2
82   PetscFunctionReturn(ierr);
83 }
84 
85 int main(int argc,char **argv)
86 {
87   PetscErrorCode                       ierr;
88   DM                                   da;
89   PetscInt                             xm,ym,zm,xs,ys,zs,gxm,gym,gzm,gxs,gys,gzs;
90   PetscInt                             dof = 1,sw = 1;
91   DMBoundaryType                       bx = DM_BOUNDARY_PERIODIC,by = DM_BOUNDARY_PERIODIC,bz = DM_BOUNDARY_PERIODIC;
92   DMDAStencilType                      st = DMDA_STENCIL_STAR;
93   Vec                                  x,y; /* local/global vectors of the da */
94   PetscRandom                          rctx;
95   const PetscScalar                    ***x1;
96   PetscScalar                          ***y1; /* arrays of g, ll */
97   const PetscScalar                    *x2;
98   PetscScalar                          *y2;
99   ConstPetscScalarKokkosOffsetView3D   x3;
100   PetscScalarKokkosOffsetView3D        y3;
101   PetscLogDouble                       tstart = 0.0,tend = 0.0,avgTime = 0.0;
102   PetscInt                             nwarm = 2, nloop = 10;
103   PetscInt                             min = 32, max = 32*8; /* min and max sizes of the grids to sample */
104 
105   ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
106   ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rctx);CHKERRQ(ierr);
107   ierr = PetscOptionsGetInt(NULL,NULL,"-min",&min,NULL);CHKERRQ(ierr);
108   ierr = PetscOptionsGetInt(NULL,NULL,"-max",&max,NULL);CHKERRQ(ierr);
109 
110   for (PetscInt len=min; len<=max; len=len*2) {
111     ierr = DMDACreate3d(PETSC_COMM_WORLD,bx,by,bz,st,len,len,len,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,dof,sw,0,0,0,&da);CHKERRQ(ierr);
112     ierr = DMSetFromOptions(da);CHKERRQ(ierr);
113     ierr = DMSetUp(da);CHKERRQ(ierr);
114 
115     ierr = DMDAGetCorners(da,&xs,&ys,&zs,&xm,&ym,&zm);CHKERRQ(ierr);
116     ierr = DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gxm,&gym,&gzm);CHKERRQ(ierr);
117     ierr = DMCreateLocalVector(da,&x);CHKERRQ(ierr); /* Create local x and global y */
118     ierr = DMCreateGlobalVector(da,&y);CHKERRQ(ierr);
119 
120     /* Access with petsc multi-dimensional arrays */
121     ierr = VecSetRandom(x,rctx);CHKERRQ(ierr);
122     ierr = VecSet(y,0.0);CHKERRQ(ierr);
123     ierr = DMDAVecGetArrayRead(da,x,&x1);CHKERRQ(ierr);
124     ierr = DMDAVecGetArray(da,y,&y1);CHKERRQ(ierr);
125     ierr = Update1(da,x1,y1,nwarm,nloop,&avgTime);CHKERRQ(ierr);
126     ierr = DMDAVecRestoreArrayRead(da,x,&x1);CHKERRQ(ierr);
127     ierr = DMDAVecRestoreArray(da,y,&y1);CHKERRQ(ierr);
128     ierr = PetscTime(&tend);CHKERRQ(ierr);
129     ierr = PetscPrintf(PETSC_COMM_WORLD,"%4d^3 -- PETSc average time  = %e\n",len,avgTime);CHKERRQ(ierr);
130 
131     /* Access with C multi-dimensional arrays */
132     ierr = VecSetRandom(x,rctx);CHKERRQ(ierr);
133     ierr = VecSet(y,0.0);CHKERRQ(ierr);
134     ierr = VecGetArrayRead(x,&x2);CHKERRQ(ierr);
135     ierr = VecGetArray(y,&y2);CHKERRQ(ierr);
136     ierr = Update2(da,x2,y2,nwarm,nloop,&avgTime);CHKERRQ(ierr);
137     ierr = VecRestoreArrayRead(x,&x2);CHKERRQ(ierr);
138     ierr = VecRestoreArray(y,&y2);CHKERRQ(ierr);
139     ierr = PetscPrintf(PETSC_COMM_WORLD,"%4d^3 -- C average time      = %e\n",len,avgTime);CHKERRQ(ierr);
140 
141     /* Access with Kokkos multi-dimensional OffsetViews */
142     ierr = VecSet(y,0.0);CHKERRQ(ierr);
143     ierr = VecSetRandom(x,rctx);CHKERRQ(ierr);
144     ierr = DMDAVecGetKokkosOffsetView(da,x,&x3);CHKERRQ(ierr);
145     ierr = DMDAVecGetKokkosOffsetView(da,y,&y3);CHKERRQ(ierr);
146 
147     for (PetscInt it=0; it<nwarm+nloop; it++) {
148       if (it == nwarm) {ierr = PetscTime(&tstart);CHKERRQ(ierr);}
149       Kokkos::parallel_for("stencil",MDRangePolicy<Kokkos::DefaultHostExecutionSpace,Rank<3,Iterate::Right,Iterate::Right>>({zs,ys,xs},{zs+zm,ys+ym,xs+xm}),
150         KOKKOS_LAMBDA(PetscInt k,PetscInt j,PetscInt i) {
151         y3(k,j,i) = 6*x3(k,j,i) - x3(k-1,j,i) - x3(k,j-1,i) - x3(k,j,i-1)
152                                 - x3(k+1,j,i) - x3(k,j+1,i) - x3(k,j,i+1);
153       });
154     }
155     ierr = PetscTime(&tend);CHKERRQ(ierr);
156     ierr = DMDAVecRestoreKokkosOffsetView(da,x,&x3);CHKERRQ(ierr);
157     ierr = DMDAVecRestoreKokkosOffsetView(da,y,&y3);CHKERRQ(ierr);
158     avgTime = (tend - tstart)/nloop;
159     ierr = PetscPrintf(PETSC_COMM_WORLD,"%4d^3 -- Kokkos average time = %e\n",len,avgTime);CHKERRQ(ierr);
160 
161     ierr = VecDestroy(&x);CHKERRQ(ierr);
162     ierr = VecDestroy(&y);CHKERRQ(ierr);
163     ierr = DMDestroy(&da);CHKERRQ(ierr);
164   }
165   ierr = PetscRandomDestroy(&rctx);CHKERRQ(ierr);
166   ierr = PetscFinalize();
167   return ierr;
168 }
169 
170 /*TEST
171   build:
172     requires: kokkos_kernels
173 
174   test:
175     suffix: 1
176     requires: kokkos_kernels
177     args: -min 32 -max 32 -dm_vec_type kokkos
178     filter: grep -v "time"
179 
180 TEST*/
181