181b6088dSJunchao Zhang static char help[] = "Benchmarking cudaPointerGetAttributes() time\n"; 281b6088dSJunchao Zhang /* 381b6088dSJunchao Zhang Running example on Summit at OLCF: 481b6088dSJunchao Zhang # run with total 1 resource set (RS) (-n1), 1 RS per node (-r1), 1 MPI rank (-a1), 7 cores (-c7) and 1 GPU (-g1) per RS 581b6088dSJunchao Zhang $ jsrun -n1 -a1 -c7 -g1 -r1 ./ex2cu 69622a0a0SJunchao Zhang Average cudaPointerGetAttributes() time = 0.31 microseconds 781b6088dSJunchao Zhang */ 881b6088dSJunchao Zhang #include <petscsys.h> 9*0e6b6b59SJacob Faibussowitsch #include <petscdevice_cuda.h> 1081b6088dSJunchao Zhang 119371c9d4SSatish Balay int main(int argc, char **argv) { 129622a0a0SJunchao Zhang PetscInt i, n = 4000; 1381b6088dSJunchao Zhang cudaError_t cerr; 1481b6088dSJunchao Zhang PetscScalar **ptrs; 1581b6088dSJunchao Zhang PetscLogDouble tstart, tend, time; 1681b6088dSJunchao Zhang struct cudaPointerAttributes attr; 1781b6088dSJunchao Zhang 18327415f7SBarry Smith PetscFunctionBeginUser; 199566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc, &argv, (char *)0, help)); 209566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL)); 219622a0a0SJunchao Zhang PetscCallCUDA(cudaStreamSynchronize(NULL)); /* Initialize CUDA runtime to get more accurate timing below */ 2281b6088dSJunchao Zhang 239566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &ptrs)); 2481b6088dSJunchao Zhang for (i = 0; i < n; i++) { 259566063dSJacob Faibussowitsch if (i % 2) PetscCall(PetscMalloc1(i + 16, &ptrs[i])); 269566063dSJacob Faibussowitsch else PetscCallCUDA(cudaMalloc((void **)&ptrs[i], (i + 16) * sizeof(PetscScalar))); 2781b6088dSJunchao Zhang } 2881b6088dSJunchao Zhang 299566063dSJacob Faibussowitsch PetscCall(PetscTime(&tstart)); 3081b6088dSJunchao Zhang for (i = 0; i < n; i++) { 3181b6088dSJunchao Zhang cerr = cudaPointerGetAttributes(&attr, ptrs[i]); 329622a0a0SJunchao Zhang if (cerr) cerr = cudaGetLastError(); 3381b6088dSJunchao Zhang } 349566063dSJacob Faibussowitsch PetscCall(PetscTime(&tend)); 3581b6088dSJunchao Zhang time = (tend - tstart) * 1e6 / n; 3681b6088dSJunchao Zhang 379566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Average cudaPointerGetAttributes() time = %.2f microseconds\n", time)); 3881b6088dSJunchao Zhang 3981b6088dSJunchao Zhang for (i = 0; i < n; i++) { 409566063dSJacob Faibussowitsch if (i % 2) PetscCall(PetscFree(ptrs[i])); 419566063dSJacob Faibussowitsch else PetscCallCUDA(cudaFree(ptrs[i])); 4281b6088dSJunchao Zhang } 439566063dSJacob Faibussowitsch PetscCall(PetscFree(ptrs)); 4481b6088dSJunchao Zhang 459566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 46b122ec5aSJacob Faibussowitsch return 0; 4781b6088dSJunchao Zhang } 4881b6088dSJunchao Zhang 4981b6088dSJunchao Zhang /*TEST 5081b6088dSJunchao Zhang build: 5181b6088dSJunchao Zhang requires: cuda 5281b6088dSJunchao Zhang 5381b6088dSJunchao Zhang test: 5481b6088dSJunchao Zhang requires: cuda 5581b6088dSJunchao Zhang args: -n 2 5681b6088dSJunchao Zhang output_file: output/empty.out 5781b6088dSJunchao Zhang filter: grep "DOES_NOT_EXIST" 5881b6088dSJunchao Zhang 5981b6088dSJunchao Zhang TEST*/ 60