1*81b6088dSJunchao Zhang static char help[] = "Benchmarking cudaPointerGetAttributes() time\n"; 2*81b6088dSJunchao Zhang /* 3*81b6088dSJunchao Zhang Running example on Summit at OLCF: 4*81b6088dSJunchao Zhang # run with total 1 resource set (RS) (-n1), 1 RS per node (-r1), 1 MPI rank (-a1), 7 cores (-c7) and 1 GPU (-g1) per RS 5*81b6088dSJunchao Zhang $ jsrun -n1 -a1 -c7 -g1 -r1 ./ex2cu 6*81b6088dSJunchao Zhang Average cudaPointerGetAttributes() time = 0.29 microseconds 7*81b6088dSJunchao Zhang */ 8*81b6088dSJunchao Zhang #include <petscsys.h> 9*81b6088dSJunchao Zhang #include <petscdevice.h> 10*81b6088dSJunchao Zhang 11*81b6088dSJunchao Zhang int main(int argc,char **argv) 12*81b6088dSJunchao Zhang { 13*81b6088dSJunchao Zhang PetscErrorCode ierr; 14*81b6088dSJunchao Zhang PetscInt i,n=2000; 15*81b6088dSJunchao Zhang cudaError_t cerr; 16*81b6088dSJunchao Zhang PetscScalar **ptrs; 17*81b6088dSJunchao Zhang PetscLogDouble tstart,tend,time; 18*81b6088dSJunchao Zhang struct cudaPointerAttributes attr; 19*81b6088dSJunchao Zhang 20*81b6088dSJunchao Zhang ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 21*81b6088dSJunchao Zhang ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr); 22*81b6088dSJunchao Zhang 23*81b6088dSJunchao Zhang ierr = PetscMalloc1(n,&ptrs);CHKERRQ(ierr); 24*81b6088dSJunchao Zhang for (i=0; i<n; i++) { 25*81b6088dSJunchao Zhang if (i%2) {ierr = PetscMalloc1(i+16,&ptrs[i]);CHKERRQ(ierr);} 26*81b6088dSJunchao Zhang else {cerr = cudaMalloc((void**)&ptrs[i],(i+16)*sizeof(PetscScalar));CHKERRCUDA(cerr);} 27*81b6088dSJunchao Zhang } 28*81b6088dSJunchao Zhang 29*81b6088dSJunchao Zhang ierr = PetscTime(&tstart);CHKERRQ(ierr); 30*81b6088dSJunchao Zhang for (i=0; i<n; i++) { 31*81b6088dSJunchao Zhang cerr = cudaPointerGetAttributes(&attr,ptrs[i]); 32*81b6088dSJunchao Zhang if (cerr) cudaGetLastError(); 33*81b6088dSJunchao Zhang } 34*81b6088dSJunchao Zhang ierr = PetscTime(&tend);CHKERRQ(ierr); 35*81b6088dSJunchao Zhang time = (tend-tstart)*1e6/n; 36*81b6088dSJunchao Zhang 37*81b6088dSJunchao Zhang ierr = PetscPrintf(PETSC_COMM_WORLD,"Average cudaPointerGetAttributes() time = %.2f microseconds\n",time);CHKERRQ(ierr); 38*81b6088dSJunchao Zhang 39*81b6088dSJunchao Zhang for (i=0; i<n; i++) { 40*81b6088dSJunchao Zhang if (i%2) {ierr = PetscFree(ptrs[i]);CHKERRQ(ierr);} 41*81b6088dSJunchao Zhang else {cerr = cudaFree(ptrs[i]);CHKERRCUDA(cerr);} 42*81b6088dSJunchao Zhang } 43*81b6088dSJunchao Zhang ierr = PetscFree(ptrs);CHKERRQ(ierr); 44*81b6088dSJunchao Zhang 45*81b6088dSJunchao Zhang ierr = PetscFinalize(); 46*81b6088dSJunchao Zhang return ierr; 47*81b6088dSJunchao Zhang } 48*81b6088dSJunchao Zhang 49*81b6088dSJunchao Zhang /*TEST 50*81b6088dSJunchao Zhang build: 51*81b6088dSJunchao Zhang requires: cuda 52*81b6088dSJunchao Zhang 53*81b6088dSJunchao Zhang test: 54*81b6088dSJunchao Zhang requires: cuda 55*81b6088dSJunchao Zhang args: -n 2 56*81b6088dSJunchao Zhang output_file: output/empty.out 57*81b6088dSJunchao Zhang filter: grep "DOES_NOT_EXIST" 58*81b6088dSJunchao Zhang 59*81b6088dSJunchao Zhang TEST*/ 60