1*d52a580bSJunchao Zhang static char help[] = "Benchmarking hipPointerGetAttributes() time\n"; 2*d52a580bSJunchao Zhang /* 3*d52a580bSJunchao Zhang Running example on Crusher at OLCF: 4*d52a580bSJunchao Zhang # run with 1 mpi rank (-n1), 32 CPUs (-c32), and map the process to CPU 0 and GPU 0 5*d52a580bSJunchao Zhang $ srun -n1 -c32 --cpu-bind=map_cpu:0 --gpus-per-node=8 --gpu-bind=map_gpu:0 ./ex2hip 6*d52a580bSJunchao Zhang Average hipPointerGetAttributes() time = 0.24 microseconds 7*d52a580bSJunchao Zhang */ 8*d52a580bSJunchao Zhang #include <petscsys.h> 9*d52a580bSJunchao Zhang #include <petscdevice_hip.h> 10*d52a580bSJunchao Zhang 11*d52a580bSJunchao Zhang int main(int argc, char **argv) 12*d52a580bSJunchao Zhang { 13*d52a580bSJunchao Zhang PetscInt i, n = 4000; 14*d52a580bSJunchao Zhang hipError_t cerr; 15*d52a580bSJunchao Zhang PetscScalar **ptrs; 16*d52a580bSJunchao Zhang PetscLogDouble tstart, tend, time; 17*d52a580bSJunchao Zhang hipPointerAttribute_t attr; 18*d52a580bSJunchao Zhang 19*d52a580bSJunchao Zhang PetscFunctionBeginUser; 20*d52a580bSJunchao Zhang PetscCall(PetscInitialize(&argc, &argv, nullptr, help)); 21*d52a580bSJunchao Zhang PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL)); 22*d52a580bSJunchao Zhang PetscCallHIP(hipStreamSynchronize(NULL)); /* Initialize HIP runtime to get more accurate timing below */ 23*d52a580bSJunchao Zhang 24*d52a580bSJunchao Zhang PetscCall(PetscMalloc1(n, &ptrs)); 25*d52a580bSJunchao Zhang for (i = 0; i < n; i++) { 26*d52a580bSJunchao Zhang if (i % 2) PetscCall(PetscMalloc1(i + 16, &ptrs[i])); 27*d52a580bSJunchao Zhang else PetscCallHIP(hipMalloc((void **)&ptrs[i], (i + 16) * sizeof(PetscScalar))); 28*d52a580bSJunchao Zhang } 29*d52a580bSJunchao Zhang 30*d52a580bSJunchao Zhang PetscCall(PetscTime(&tstart)); 31*d52a580bSJunchao Zhang for (i = 0; i < n; i++) { 32*d52a580bSJunchao Zhang cerr = hipPointerGetAttributes(&attr, ptrs[i]); 33*d52a580bSJunchao Zhang if (cerr) cerr = hipGetLastError(); 34*d52a580bSJunchao Zhang } 35*d52a580bSJunchao Zhang PetscCall(PetscTime(&tend)); 36*d52a580bSJunchao Zhang time = (tend - tstart) * 1e6 / n; 37*d52a580bSJunchao Zhang 38*d52a580bSJunchao Zhang PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Average hipPointerGetAttributes() time = %.2f microseconds\n", time)); 39*d52a580bSJunchao Zhang 40*d52a580bSJunchao Zhang for (i = 0; i < n; i++) { 41*d52a580bSJunchao Zhang if (i % 2) PetscCall(PetscFree(ptrs[i])); 42*d52a580bSJunchao Zhang else PetscCallHIP(hipFree(ptrs[i])); 43*d52a580bSJunchao Zhang } 44*d52a580bSJunchao Zhang PetscCall(PetscFree(ptrs)); 45*d52a580bSJunchao Zhang PetscCall(PetscFinalize()); 46*d52a580bSJunchao Zhang return 0; 47*d52a580bSJunchao Zhang } 48*d52a580bSJunchao Zhang 49*d52a580bSJunchao Zhang /*TEST 50*d52a580bSJunchao Zhang build: 51*d52a580bSJunchao Zhang requires: hip 52*d52a580bSJunchao Zhang 53*d52a580bSJunchao Zhang test: 54*d52a580bSJunchao Zhang requires: hip 55*d52a580bSJunchao Zhang args: -n 2 56*d52a580bSJunchao Zhang output_file: output/empty.out 57*d52a580bSJunchao Zhang filter: grep "DOES_NOT_EXIST" 58*d52a580bSJunchao Zhang 59*d52a580bSJunchao Zhang TEST*/ 60