1 static char help[] = "Driver for benchmarking SpMV."; 2 3 #include <petscmat.h> 4 #include "cJSON.h" 5 #include "mmloader.h" 6 7 char *read_file(const char *filename) 8 { 9 FILE *file = NULL; 10 long length = 0; 11 char *content = NULL; 12 size_t read_chars = 0; 13 14 /* open in read binary mode */ 15 file = fopen(filename, "rb"); 16 if (file) { 17 /* get the length */ 18 fseek(file, 0, SEEK_END); 19 length = ftell(file); 20 fseek(file, 0, SEEK_SET); 21 /* allocate content buffer */ 22 content = (char *)malloc((size_t)length + sizeof("")); 23 /* read the file into memory */ 24 read_chars = fread(content, sizeof(char), (size_t)length, file); 25 content[read_chars] = '\0'; 26 fclose(file); 27 } 28 return content; 29 } 30 31 void write_file(const char *filename, const char *content) 32 { 33 FILE *file = NULL; 34 file = fopen(filename, "w"); 35 if (file) { fputs(content, file); } 36 fclose(file); 37 } 38 39 int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat) 40 { 41 char *content = read_file(inputjsonfile); 42 cJSON *matrix_json = NULL; 43 const cJSON *problem = NULL, *elem = NULL; 44 const cJSON *item = NULL; 45 char **filenames, **groupnames, **matnames; 46 int i, n; 47 if (!content) return 0; 48 matrix_json = cJSON_Parse(content); 49 if (!matrix_json) return 0; 50 n = cJSON_GetArraySize(matrix_json); 51 *nmat = n; 52 filenames = (char **)malloc(sizeof(char *) * n); 53 groupnames = (char **)malloc(sizeof(char *) * n); 54 matnames = (char **)malloc(sizeof(char *) * n); 55 for (i = 0; i < n; i++) { 56 elem = cJSON_GetArrayItem(matrix_json, i); 57 item = cJSON_GetObjectItemCaseSensitive(elem, "filename"); 58 filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1)); 59 strcpy(filenames[i], item->valuestring); 60 problem = cJSON_GetObjectItemCaseSensitive(elem, "problem"); 61 item = cJSON_GetObjectItemCaseSensitive(problem, "group"); 62 groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1); 63 strcpy(groupnames[i], item->valuestring); 64 item = cJSON_GetObjectItemCaseSensitive(problem, "name"); 65 matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1); 66 strcpy(matnames[i], item->valuestring); 67 } 68 cJSON_Delete(matrix_json); 69 free(content); 70 *outputfilenames = filenames; 71 *outputgroupnames = groupnames; 72 *outputmatnames = matnames; 73 return 0; 74 } 75 76 int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions) 77 { 78 char *content = read_file(inputjsonfile); 79 cJSON *matrix_json = NULL; 80 cJSON *elem = NULL; 81 int i, n; 82 if (!content) return 0; 83 matrix_json = cJSON_Parse(content); 84 if (!matrix_json) return 0; 85 n = cJSON_GetArraySize(matrix_json); 86 for (i = 0; i < n; i++) { 87 cJSON *spmv = NULL; 88 cJSON *format = NULL; 89 elem = cJSON_GetArrayItem(matrix_json, i); 90 spmv = cJSON_GetObjectItem(elem, "spmv"); 91 if (spmv) { 92 format = cJSON_GetObjectItem(spmv, matformat); 93 if (format) { 94 cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 95 cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions); 96 } else { 97 format = cJSON_CreateObject(); 98 cJSON_AddItemToObject(spmv, matformat, format); 99 cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 100 cJSON_AddNumberToObject(format, "repetitions", repetitions); 101 } 102 } else { 103 spmv = cJSON_CreateObject(); 104 cJSON_AddItemToObject(elem, "spmv", spmv); 105 format = cJSON_CreateObject(); 106 cJSON_AddItemToObject(spmv, matformat, format); 107 cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 108 cJSON_AddNumberToObject(format, "repetitions", repetitions); 109 } 110 } 111 free(content); 112 content = cJSON_Print(matrix_json); 113 write_file(inputjsonfile, content); 114 cJSON_Delete(matrix_json); 115 free(content); 116 return 0; 117 } 118 119 /* 120 For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU. 121 The extra CPU copy allows us to destroy the GPU matrix and recreate it effciently 122 in each repetition. As a result, each MatMult call is fresh, and we can capture 123 the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect 124 during consecutive calls. 125 */ 126 PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions) 127 { 128 Mat A2 = NULL; 129 PetscInt i; 130 Vec u; 131 PetscLogDouble vstart = 0, vend = 0; 132 PetscBool isaijcusparse, isaijkokkos; 133 134 PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse)); 135 PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos)); 136 if (isaijcusparse) PetscCall(VecSetType(b, VECCUDA)); 137 if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS)); 138 PetscCall(VecDuplicate(b, &u)); 139 if (time) *time = 0.0; 140 for (i = 0; i < repetitions; i++) { 141 if (use_gpu) { 142 PetscCall(MatDestroy(&A2)); 143 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2)); 144 PetscCall(MatConvert(A2, petscmatformat, MAT_INPLACE_MATRIX, &A2)); 145 } else A2 = A; 146 /* Timing MatMult */ 147 if (time) PetscCall(PetscTime(&vstart)); 148 149 PetscCall(MatMult(A2, b, u)); 150 151 if (time) { 152 PetscCall(PetscTime(&vend)); 153 *time += (PetscReal)(vend - vstart); 154 } 155 } 156 PetscCall(VecDestroy(&u)); 157 if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2)); 158 return 0; 159 } 160 161 PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat) 162 { 163 PetscLogEvent event; 164 PetscEventPerfInfo eventInfo; 165 //PetscReal gpuflopRate; 166 167 // if (matformat) { 168 // PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event)); 169 // } else { 170 // PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event)); 171 // } 172 // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo)); 173 // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time)); 174 175 PetscCall(PetscLogEventGetId("MatMult", &event)); 176 PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo)); 177 //gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime; 178 // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time)); 179 if (cputime) *cputime = eventInfo.time; 180 #if defined(PETSC_HAVE_DEVICE) 181 if (gputime) *gputime = eventInfo.GpuTime; 182 if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6; 183 #endif 184 return 0; 185 } 186 187 PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat) 188 { 189 PetscBool iscsr, issell, iscsrkokkos; 190 PetscCall(PetscStrcmp(matformat, "csr", &iscsr)); 191 if (iscsr) { 192 if (use_gpu) PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat)); 193 else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat)); 194 } else { 195 PetscCall(PetscStrcmp(matformat, "sell", &issell)); 196 if (issell) { 197 if (use_gpu) PetscCall(PetscStrallocpy(MATSELL, petscmatformat)); // placeholder for SELLCUDA 198 else PetscCall(PetscStrallocpy(MATSELL, petscmatformat)); 199 } else { 200 PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos)); 201 if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat)); 202 } 203 } 204 return 0; 205 } 206 207 int main(int argc, char **args) 208 { 209 PetscInt nmat = 1, nformats = 5, i, j, repetitions = 1; 210 Mat A; 211 Vec b; 212 char jfilename[PETSC_MAX_PATH_LEN]; 213 char filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN]; 214 char groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN]; 215 char *matformats[5]; 216 char **filenames = NULL, **groupnames = NULL, **matnames = NULL; 217 PetscBool bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE; 218 PetscViewer fd; 219 PetscReal starting_spmv_time = 0, *spmv_times; 220 221 PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time -log_view :/dev/null")); 222 PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 223 PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1)); 224 if (!flg1) { 225 nformats = 1; 226 PetscCall(PetscStrallocpy("csr", &matformats[0])); 227 } 228 PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL)); 229 PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL)); 230 /* Read matrix and RHS */ 231 PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL)); 232 PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL)); 233 PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1)); 234 PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2)); 235 PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3)); 236 #if !defined(PETSC_HAVE_DEVICE) 237 PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support"); 238 #endif 239 PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format"); 240 if (flg3) { 241 ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat); 242 PetscCall(PetscCalloc1(nmat, &spmv_times)); 243 } else if (flg2) { 244 PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE)); 245 } else if (flg1) { 246 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd)); 247 PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 248 PetscCall(MatSetType(A, MATAIJ)); 249 PetscCall(MatSetFromOptions(A)); 250 PetscCall(MatLoad(A, fd)); 251 PetscCall(PetscViewerDestroy(&fd)); 252 } 253 /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */ 254 PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg)); 255 if (bflg) { 256 PetscViewer fb; 257 PetscCall(VecCreate(PETSC_COMM_WORLD, &b)); 258 PetscCall(VecSetFromOptions(b)); 259 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb)); 260 PetscCall(VecLoad(b, fb)); 261 PetscCall(PetscViewerDestroy(&fb)); 262 } 263 264 for (j = 0; j < nformats; j++) { 265 char *petscmatformat = NULL; 266 MapToPetscMatType(matformats[j], use_gpu, &petscmatformat); 267 PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]); 268 if (flg3) { // mat names specified in a JSON file 269 for (i = 0; i < nmat; i++) { 270 PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE)); 271 if (!bflg) { 272 PetscCall(MatCreateVecs(A, &b, NULL)); 273 PetscCall(VecSet(b, 1.0)); 274 } 275 PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions)); 276 if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat)); 277 else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat)); 278 PetscCall(MatDestroy(&A)); 279 if (!bflg) PetscCall(VecDestroy(&b)); 280 } 281 UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions); 282 starting_spmv_time = spmv_times[nmat - 1]; 283 } else { 284 PetscReal spmv_time; 285 if (!bflg) { 286 PetscCall(MatCreateVecs(A, &b, NULL)); 287 PetscCall(VecSet(b, 1.0)); 288 } 289 PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions)); 290 if (!bflg) PetscCall(VecDestroy(&b)); 291 } 292 PetscCall(PetscFree(petscmatformat)); 293 } 294 if (flg3) { 295 for (i = 0; i < nmat; i++) { 296 free(filenames[i]); 297 free(groupnames[i]); 298 free(matnames[i]); 299 } 300 free(filenames); 301 free(groupnames); 302 free(matnames); 303 PetscCall(PetscFree(spmv_times)); 304 } 305 for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j])); 306 if (flg1 || flg2) PetscCall(MatDestroy(&A)); 307 if (bflg) PetscCall(VecDestroy(&b)); 308 PetscCall(PetscFinalize()); 309 return 0; 310 } 311 /*TEST 312 313 build: 314 requires: !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES) 315 depends: mmloader.c mmio.c cJSON.c 316 317 test: 318 suffix: 1 319 args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx 320 321 test: 322 suffix: 2 323 args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu 324 output_file: output/bench_spmv_1.out 325 requires: cuda 326 327 TEST*/ 328