1 static char help[] = "Driver for benchmarking SpMV."; 2 3 #include <petscmat.h> 4 #include "cJSON.h" 5 #include "mmloader.h" 6 7 char *read_file(const char *filename) 8 { 9 FILE *file = NULL; 10 long length = 0; 11 char *content = NULL; 12 size_t read_chars = 0; 13 14 /* open in read binary mode */ 15 file = fopen(filename, "rb"); 16 if (file) { 17 /* get the length */ 18 fseek(file, 0, SEEK_END); 19 length = ftell(file); 20 fseek(file, 0, SEEK_SET); 21 /* allocate content buffer */ 22 content = (char *)malloc((size_t)length + sizeof("")); 23 /* read the file into memory */ 24 read_chars = fread(content, sizeof(char), (size_t)length, file); 25 content[read_chars] = '\0'; 26 fclose(file); 27 } 28 return content; 29 } 30 31 void write_file(const char *filename, const char *content) 32 { 33 FILE *file = NULL; 34 file = fopen(filename, "w"); 35 if (file) { fputs(content, file); } 36 fclose(file); 37 } 38 39 int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat) 40 { 41 char *content = read_file(inputjsonfile); 42 cJSON *matrix_json = NULL; 43 const cJSON *problem = NULL, *elem = NULL; 44 const cJSON *item = NULL; 45 char **filenames, **groupnames, **matnames; 46 int i, n; 47 if (!content) return 0; 48 matrix_json = cJSON_Parse(content); 49 if (!matrix_json) return 0; 50 n = cJSON_GetArraySize(matrix_json); 51 *nmat = n; 52 filenames = (char **)malloc(sizeof(char *) * n); 53 groupnames = (char **)malloc(sizeof(char *) * n); 54 matnames = (char **)malloc(sizeof(char *) * n); 55 for (i = 0; i < n; i++) { 56 elem = cJSON_GetArrayItem(matrix_json, i); 57 item = cJSON_GetObjectItemCaseSensitive(elem, "filename"); 58 filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1)); 59 strcpy(filenames[i], item->valuestring); 60 problem = cJSON_GetObjectItemCaseSensitive(elem, "problem"); 61 item = cJSON_GetObjectItemCaseSensitive(problem, "group"); 62 groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1); 63 strcpy(groupnames[i], item->valuestring); 64 item = cJSON_GetObjectItemCaseSensitive(problem, "name"); 65 matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1); 66 strcpy(matnames[i], item->valuestring); 67 } 68 cJSON_Delete(matrix_json); 69 free(content); 70 *outputfilenames = filenames; 71 *outputgroupnames = groupnames; 72 *outputmatnames = matnames; 73 return 0; 74 } 75 76 int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions) 77 { 78 char *content = read_file(inputjsonfile); 79 cJSON *matrix_json = NULL; 80 cJSON *elem = NULL; 81 int i, n; 82 if (!content) return 0; 83 matrix_json = cJSON_Parse(content); 84 if (!matrix_json) return 0; 85 n = cJSON_GetArraySize(matrix_json); 86 for (i = 0; i < n; i++) { 87 cJSON *spmv = NULL; 88 cJSON *format = NULL; 89 elem = cJSON_GetArrayItem(matrix_json, i); 90 spmv = cJSON_GetObjectItem(elem, "spmv"); 91 if (spmv) { 92 format = cJSON_GetObjectItem(spmv, matformat); 93 if (format) { 94 cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 95 cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions); 96 } else { 97 format = cJSON_CreateObject(); 98 cJSON_AddItemToObject(spmv, matformat, format); 99 cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 100 cJSON_AddNumberToObject(format, "repetitions", repetitions); 101 } 102 } else { 103 spmv = cJSON_CreateObject(); 104 cJSON_AddItemToObject(elem, "spmv", spmv); 105 format = cJSON_CreateObject(); 106 cJSON_AddItemToObject(spmv, matformat, format); 107 cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 108 cJSON_AddNumberToObject(format, "repetitions", repetitions); 109 } 110 } 111 free(content); 112 content = cJSON_Print(matrix_json); 113 write_file(inputjsonfile, content); 114 cJSON_Delete(matrix_json); 115 free(content); 116 return 0; 117 } 118 119 /* 120 For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU. 121 The extra CPU copy allows us to destroy the GPU matrix and recreate it efficiently 122 in each repetition. As a result, each MatMult call is fresh, and we can capture 123 the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect 124 during consecutive calls. 125 */ 126 PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions) 127 { 128 Mat A2 = NULL; 129 PetscInt i; 130 Vec u; 131 PetscLogDouble vstart = 0, vend = 0; 132 PetscBool isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip; 133 134 PetscFunctionBeginUser; 135 PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse)); 136 PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse)); 137 PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos)); 138 PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda)); 139 PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip)); 140 if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA)); 141 if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS)); 142 if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP)); 143 PetscCall(VecDuplicate(b, &u)); 144 if (time) *time = 0.0; 145 for (i = 0; i < repetitions; i++) { 146 if (use_gpu) { 147 PetscCall(MatDestroy(&A2)); 148 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2)); 149 if (issellcuda) { 150 PetscCall(MatConvert(A2, MATSELL, MAT_INPLACE_MATRIX, &A2)); 151 PetscCall(MatConvert(A2, MATSELLCUDA, MAT_INPLACE_MATRIX, &A2)); 152 } else if (issellhip) { 153 PetscCall(MatConvert(A2, MATSELL, MAT_INPLACE_MATRIX, &A2)); 154 PetscCall(MatConvert(A2, MATSELLHIP, MAT_INPLACE_MATRIX, &A2)); 155 } else { 156 PetscCall(MatConvert(A2, petscmatformat, MAT_INPLACE_MATRIX, &A2)); 157 } 158 PetscCall(MatSetFromOptions(A2)); // This allows to change parameters such as slice height in SpMV kernels for SELL 159 } else A2 = A; 160 /* Timing MatMult */ 161 if (time) PetscCall(PetscTime(&vstart)); 162 163 PetscCall(MatMult(A2, b, u)); 164 165 if (time) { 166 PetscCall(PetscTime(&vend)); 167 *time += (PetscReal)(vend - vstart); 168 } 169 } 170 PetscCall(VecDestroy(&u)); 171 if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2)); 172 PetscFunctionReturn(PETSC_SUCCESS); 173 } 174 175 PetscErrorCode WarmUpDevice(Mat A, Vec b, const char *petscmatformat) 176 { 177 PetscLogEvent event; 178 Vec u; 179 PetscBool isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip; 180 181 PetscFunctionBeginUser; 182 PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse)); 183 PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse)); 184 PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos)); 185 PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda)); 186 PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip)); 187 if (!isaijcusparse && !isaijkokkos && !isaijhipsparse && !issellcuda && !issellhip) PetscFunctionReturn(PETSC_SUCCESS); 188 if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA)); 189 if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS)); 190 if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP)); 191 PetscCall(VecDuplicate(b, &u)); 192 PetscCall(MatConvert(A, petscmatformat, MAT_INPLACE_MATRIX, &A)); 193 PetscCall(PetscLogEventGetId("MatMult", &event)); 194 PetscCall(PetscLogEventDeactivatePush(event)); 195 PetscCall(MatMult(A, b, u)); 196 PetscCall(PetscLogEventDeactivatePop(event)); 197 PetscCall(VecDestroy(&u)); 198 PetscFunctionReturn(PETSC_SUCCESS); 199 } 200 201 PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat) 202 { 203 PetscLogEvent event; 204 PetscEventPerfInfo eventInfo; 205 // PetscReal gpuflopRate; 206 207 // if (matformat) { 208 // PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event)); 209 // } else { 210 // PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event)); 211 // } 212 // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo)); 213 // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time)); 214 215 PetscFunctionBeginUser; 216 PetscCall(PetscLogEventGetId("MatMult", &event)); 217 PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo)); 218 // gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime; 219 // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time)); 220 if (cputime) *cputime = eventInfo.time; 221 #if defined(PETSC_HAVE_DEVICE) 222 if (gputime) *gputime = eventInfo.GpuTime; 223 if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6; 224 #endif 225 PetscFunctionReturn(PETSC_SUCCESS); 226 } 227 228 PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat) 229 { 230 PetscBool iscsr, issell, iscsrkokkos; 231 232 PetscFunctionBeginUser; 233 PetscCall(PetscStrcmp(matformat, "csr", &iscsr)); 234 if (iscsr) { 235 if (use_gpu) { 236 #if defined(PETSC_HAVE_CUDA) 237 PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat)); 238 #endif 239 #if defined(PETSC_HAVE_HIP) 240 PetscCall(PetscStrallocpy(MATAIJHIPSPARSE, petscmatformat)); 241 #endif 242 } else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat)); 243 } else { 244 PetscCall(PetscStrcmp(matformat, "sell", &issell)); 245 if (issell) { 246 if (use_gpu) { 247 #if defined(PETSC_HAVE_CUDA) 248 PetscCall(PetscStrallocpy(MATSELLCUDA, petscmatformat)); 249 #endif 250 #if defined(PETSC_HAVE_HIP) 251 PetscCall(PetscStrallocpy(MATSELLHIP, petscmatformat)); 252 #endif 253 } else PetscCall(PetscStrallocpy(MATSELL, petscmatformat)); 254 } else { 255 PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos)); 256 if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat)); 257 } 258 } 259 PetscFunctionReturn(PETSC_SUCCESS); 260 } 261 262 int main(int argc, char **args) 263 { 264 PetscInt nmat = 1, nformats = 5, i, j, repetitions = 1; 265 Mat A; 266 Vec b; 267 char jfilename[PETSC_MAX_PATH_LEN]; 268 char filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN]; 269 char groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN]; 270 char *matformats[5]; 271 char **filenames = NULL, **groupnames = NULL, **matnames = NULL; 272 char ordering[256] = MATORDERINGRCM; 273 PetscBool bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE; 274 IS rowperm = NULL, colperm = NULL; 275 PetscViewer fd; 276 PetscReal starting_spmv_time = 0, *spmv_times; 277 278 PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time -log_view :/dev/null")); 279 PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 280 PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1)); 281 if (!flg1) { 282 nformats = 1; 283 PetscCall(PetscStrallocpy("csr", &matformats[0])); 284 } 285 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL)); 286 PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL)); 287 /* Read matrix and RHS */ 288 PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL)); 289 PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL)); 290 PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1)); 291 PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2)); 292 PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3)); 293 PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", ""); 294 PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute)); 295 PetscOptionsEnd(); 296 #if !defined(PETSC_HAVE_DEVICE) 297 PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support"); 298 #endif 299 PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format"); 300 if (flg3) { 301 ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat); 302 PetscCall(PetscCalloc1(nmat, &spmv_times)); 303 } else if (flg2) { 304 PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE)); 305 } else if (flg1) { 306 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd)); 307 PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 308 PetscCall(MatSetType(A, MATAIJ)); 309 PetscCall(MatSetFromOptions(A)); 310 PetscCall(MatLoad(A, fd)); 311 PetscCall(PetscViewerDestroy(&fd)); 312 } 313 if (permute) { 314 Mat Aperm; 315 PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm)); 316 PetscCall(MatPermute(A, rowperm, colperm, &Aperm)); 317 PetscCall(MatDestroy(&A)); 318 A = Aperm; /* Replace original operator with permuted version */ 319 } 320 /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */ 321 PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg)); 322 if (bflg) { 323 PetscViewer fb; 324 PetscCall(VecCreate(PETSC_COMM_WORLD, &b)); 325 PetscCall(VecSetFromOptions(b)); 326 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb)); 327 PetscCall(VecLoad(b, fb)); 328 PetscCall(PetscViewerDestroy(&fb)); 329 } 330 331 for (j = 0; j < nformats; j++) { 332 char *petscmatformat = NULL; 333 PetscCall(MapToPetscMatType(matformats[j], use_gpu, &petscmatformat)); 334 PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]); 335 if (flg3) { // mat names specified in a JSON file 336 for (i = 0; i < nmat; i++) { 337 PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE)); 338 if (!bflg) { 339 PetscCall(MatCreateVecs(A, &b, NULL)); 340 PetscCall(VecSet(b, 1.0)); 341 } 342 if (use_gpu) PetscCall(WarmUpDevice(A, b, petscmatformat)); 343 PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions)); 344 if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat)); 345 else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat)); 346 PetscCall(MatDestroy(&A)); 347 if (!bflg) PetscCall(VecDestroy(&b)); 348 } 349 UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions); 350 starting_spmv_time = spmv_times[nmat - 1]; 351 } else { 352 PetscReal spmv_time; 353 if (!bflg) { 354 PetscCall(MatCreateVecs(A, &b, NULL)); 355 PetscCall(VecSet(b, 1.0)); 356 } 357 if (use_gpu) PetscCall(WarmUpDevice(A, b, petscmatformat)); 358 PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions)); 359 if (!bflg) PetscCall(VecDestroy(&b)); 360 } 361 PetscCall(PetscFree(petscmatformat)); 362 } 363 if (flg3) { 364 for (i = 0; i < nmat; i++) { 365 free(filenames[i]); 366 free(groupnames[i]); 367 free(matnames[i]); 368 } 369 free(filenames); 370 free(groupnames); 371 free(matnames); 372 PetscCall(PetscFree(spmv_times)); 373 } 374 for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j])); 375 if (flg1 || flg2) PetscCall(MatDestroy(&A)); 376 if (bflg) PetscCall(VecDestroy(&b)); 377 PetscCall(ISDestroy(&rowperm)); 378 PetscCall(ISDestroy(&colperm)); 379 PetscCall(PetscFinalize()); 380 return 0; 381 } 382 /*TEST 383 384 build: 385 requires: !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES) 386 depends: mmloader.c mmio.c cJSON.c 387 388 test: 389 suffix: 1 390 args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx 391 392 test: 393 suffix: 2 394 args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu 395 output_file: output/bench_spmv_1.out 396 requires: cuda 397 398 test: 399 suffix: 3 400 args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu 401 output_file: output/bench_spmv_1.out 402 requires: hip 403 404 TEST*/ 405