xref: /petsc/src/sys/objects/device/tests/ex2cu.cu (revision 81b6088dc7b1e9ba8d4577257a4b96173292dab9)
1*81b6088dSJunchao Zhang static char help[] = "Benchmarking cudaPointerGetAttributes() time\n";
2*81b6088dSJunchao Zhang /*
3*81b6088dSJunchao Zhang   Running example on Summit at OLCF:
4*81b6088dSJunchao Zhang   # run with total 1 resource set (RS) (-n1), 1 RS per node (-r1), 1 MPI rank (-a1), 7 cores (-c7) and 1 GPU (-g1) per RS
5*81b6088dSJunchao Zhang   $ jsrun -n1 -a1 -c7 -g1 -r1  ./ex2cu
6*81b6088dSJunchao Zhang     Average cudaPointerGetAttributes() time = 0.29 microseconds
7*81b6088dSJunchao Zhang */
8*81b6088dSJunchao Zhang #include <petscsys.h>
9*81b6088dSJunchao Zhang #include <petscdevice.h>
10*81b6088dSJunchao Zhang 
11*81b6088dSJunchao Zhang int main(int argc,char **argv)
12*81b6088dSJunchao Zhang {
13*81b6088dSJunchao Zhang   PetscErrorCode               ierr;
14*81b6088dSJunchao Zhang   PetscInt                     i,n=2000;
15*81b6088dSJunchao Zhang   cudaError_t                  cerr;
16*81b6088dSJunchao Zhang   PetscScalar                  **ptrs;
17*81b6088dSJunchao Zhang   PetscLogDouble               tstart,tend,time;
18*81b6088dSJunchao Zhang   struct cudaPointerAttributes attr;
19*81b6088dSJunchao Zhang 
20*81b6088dSJunchao Zhang   ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
21*81b6088dSJunchao Zhang   ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr);
22*81b6088dSJunchao Zhang 
23*81b6088dSJunchao Zhang   ierr = PetscMalloc1(n,&ptrs);CHKERRQ(ierr);
24*81b6088dSJunchao Zhang   for (i=0; i<n; i++) {
25*81b6088dSJunchao Zhang     if (i%2) {ierr = PetscMalloc1(i+16,&ptrs[i]);CHKERRQ(ierr);}
26*81b6088dSJunchao Zhang     else {cerr = cudaMalloc((void**)&ptrs[i],(i+16)*sizeof(PetscScalar));CHKERRCUDA(cerr);}
27*81b6088dSJunchao Zhang   }
28*81b6088dSJunchao Zhang 
29*81b6088dSJunchao Zhang   ierr = PetscTime(&tstart);CHKERRQ(ierr);
30*81b6088dSJunchao Zhang   for (i=0; i<n; i++) {
31*81b6088dSJunchao Zhang     cerr = cudaPointerGetAttributes(&attr,ptrs[i]);
32*81b6088dSJunchao Zhang     if (cerr) cudaGetLastError();
33*81b6088dSJunchao Zhang   }
34*81b6088dSJunchao Zhang   ierr = PetscTime(&tend);CHKERRQ(ierr);
35*81b6088dSJunchao Zhang   time = (tend-tstart)*1e6/n;
36*81b6088dSJunchao Zhang 
37*81b6088dSJunchao Zhang   ierr = PetscPrintf(PETSC_COMM_WORLD,"Average cudaPointerGetAttributes() time = %.2f microseconds\n",time);CHKERRQ(ierr);
38*81b6088dSJunchao Zhang 
39*81b6088dSJunchao Zhang   for (i=0; i<n; i++) {
40*81b6088dSJunchao Zhang     if (i%2) {ierr = PetscFree(ptrs[i]);CHKERRQ(ierr);}
41*81b6088dSJunchao Zhang     else {cerr = cudaFree(ptrs[i]);CHKERRCUDA(cerr);}
42*81b6088dSJunchao Zhang   }
43*81b6088dSJunchao Zhang   ierr = PetscFree(ptrs);CHKERRQ(ierr);
44*81b6088dSJunchao Zhang 
45*81b6088dSJunchao Zhang   ierr = PetscFinalize();
46*81b6088dSJunchao Zhang   return ierr;
47*81b6088dSJunchao Zhang }
48*81b6088dSJunchao Zhang 
49*81b6088dSJunchao Zhang /*TEST
50*81b6088dSJunchao Zhang   build:
51*81b6088dSJunchao Zhang     requires: cuda
52*81b6088dSJunchao Zhang 
53*81b6088dSJunchao Zhang   test:
54*81b6088dSJunchao Zhang     requires: cuda
55*81b6088dSJunchao Zhang     args: -n 2
56*81b6088dSJunchao Zhang     output_file: output/empty.out
57*81b6088dSJunchao Zhang     filter: grep "DOES_NOT_EXIST"
58*81b6088dSJunchao Zhang 
59*81b6088dSJunchao Zhang TEST*/
60