181b6088dSJunchao Zhang static char help[] = "Benchmarking cudaPointerGetAttributes() time\n"; 281b6088dSJunchao Zhang /* 381b6088dSJunchao Zhang Running example on Summit at OLCF: 481b6088dSJunchao Zhang # run with total 1 resource set (RS) (-n1), 1 RS per node (-r1), 1 MPI rank (-a1), 7 cores (-c7) and 1 GPU (-g1) per RS 581b6088dSJunchao Zhang $ jsrun -n1 -a1 -c7 -g1 -r1 ./ex2cu 69622a0a0SJunchao Zhang Average cudaPointerGetAttributes() time = 0.31 microseconds 781b6088dSJunchao Zhang */ 881b6088dSJunchao Zhang #include <petscsys.h> 981b6088dSJunchao Zhang #include <petscdevice.h> 1081b6088dSJunchao Zhang 1181b6088dSJunchao Zhang int main(int argc,char **argv) 1281b6088dSJunchao Zhang { 139622a0a0SJunchao Zhang PetscInt i,n=4000; 1481b6088dSJunchao Zhang cudaError_t cerr; 1581b6088dSJunchao Zhang PetscScalar **ptrs; 1681b6088dSJunchao Zhang PetscLogDouble tstart,tend,time; 1781b6088dSJunchao Zhang struct cudaPointerAttributes attr; 1881b6088dSJunchao Zhang 19*327415f7SBarry Smith PetscFunctionBeginUser; 209566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc,&argv,(char*)0,help)); 219566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL)); 229622a0a0SJunchao Zhang PetscCallCUDA(cudaStreamSynchronize(NULL)); /* Initialize CUDA runtime to get more accurate timing below */ 2381b6088dSJunchao Zhang 249566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n,&ptrs)); 2581b6088dSJunchao Zhang for (i=0; i<n; i++) { 269566063dSJacob Faibussowitsch if (i%2) PetscCall(PetscMalloc1(i+16,&ptrs[i])); 279566063dSJacob Faibussowitsch else PetscCallCUDA(cudaMalloc((void**)&ptrs[i],(i+16)*sizeof(PetscScalar))); 2881b6088dSJunchao Zhang } 2981b6088dSJunchao Zhang 309566063dSJacob Faibussowitsch PetscCall(PetscTime(&tstart)); 3181b6088dSJunchao Zhang for (i=0; i<n; i++) { 3281b6088dSJunchao Zhang cerr = cudaPointerGetAttributes(&attr,ptrs[i]); 339622a0a0SJunchao Zhang if (cerr) cerr = cudaGetLastError(); 3481b6088dSJunchao Zhang } 359566063dSJacob Faibussowitsch PetscCall(PetscTime(&tend)); 3681b6088dSJunchao Zhang time = (tend-tstart)*1e6/n; 3781b6088dSJunchao Zhang 389566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Average cudaPointerGetAttributes() time = %.2f microseconds\n",time)); 3981b6088dSJunchao Zhang 4081b6088dSJunchao Zhang for (i=0; i<n; i++) { 419566063dSJacob Faibussowitsch if (i%2) PetscCall(PetscFree(ptrs[i])); 429566063dSJacob Faibussowitsch else PetscCallCUDA(cudaFree(ptrs[i])); 4381b6088dSJunchao Zhang } 449566063dSJacob Faibussowitsch PetscCall(PetscFree(ptrs)); 4581b6088dSJunchao Zhang 469566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 47b122ec5aSJacob Faibussowitsch return 0; 4881b6088dSJunchao Zhang } 4981b6088dSJunchao Zhang 5081b6088dSJunchao Zhang /*TEST 5181b6088dSJunchao Zhang build: 5281b6088dSJunchao Zhang requires: cuda 5381b6088dSJunchao Zhang 5481b6088dSJunchao Zhang test: 5581b6088dSJunchao Zhang requires: cuda 5681b6088dSJunchao Zhang args: -n 2 5781b6088dSJunchao Zhang output_file: output/empty.out 5881b6088dSJunchao Zhang filter: grep "DOES_NOT_EXIST" 5981b6088dSJunchao Zhang 6081b6088dSJunchao Zhang TEST*/ 61