189928cc5SHong Zhang static char help[] = "Driver for benchmarking SpMV."; 289928cc5SHong Zhang 389928cc5SHong Zhang #include <petscmat.h> 489928cc5SHong Zhang #include "cJSON.h" 589928cc5SHong Zhang #include "mmloader.h" 689928cc5SHong Zhang 789928cc5SHong Zhang char *read_file(const char *filename) 889928cc5SHong Zhang { 989928cc5SHong Zhang FILE *file = NULL; 1089928cc5SHong Zhang long length = 0; 1189928cc5SHong Zhang char *content = NULL; 1289928cc5SHong Zhang size_t read_chars = 0; 1389928cc5SHong Zhang 1489928cc5SHong Zhang /* open in read binary mode */ 1589928cc5SHong Zhang file = fopen(filename, "rb"); 1689928cc5SHong Zhang if (file) { 1789928cc5SHong Zhang /* get the length */ 1889928cc5SHong Zhang fseek(file, 0, SEEK_END); 1989928cc5SHong Zhang length = ftell(file); 2089928cc5SHong Zhang fseek(file, 0, SEEK_SET); 2189928cc5SHong Zhang /* allocate content buffer */ 2289928cc5SHong Zhang content = (char *)malloc((size_t)length + sizeof("")); 2389928cc5SHong Zhang /* read the file into memory */ 2489928cc5SHong Zhang read_chars = fread(content, sizeof(char), (size_t)length, file); 2589928cc5SHong Zhang content[read_chars] = '\0'; 2689928cc5SHong Zhang fclose(file); 2789928cc5SHong Zhang } 2889928cc5SHong Zhang return content; 2989928cc5SHong Zhang } 3089928cc5SHong Zhang 3189928cc5SHong Zhang void write_file(const char *filename, const char *content) 3289928cc5SHong Zhang { 3389928cc5SHong Zhang FILE *file = NULL; 3489928cc5SHong Zhang file = fopen(filename, "w"); 3589928cc5SHong Zhang if (file) { fputs(content, file); } 3689928cc5SHong Zhang fclose(file); 3789928cc5SHong Zhang } 3889928cc5SHong Zhang 3989928cc5SHong Zhang int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat) 4089928cc5SHong Zhang { 4189928cc5SHong Zhang char *content = read_file(inputjsonfile); 4289928cc5SHong Zhang cJSON *matrix_json = NULL; 4389928cc5SHong Zhang const cJSON *problem = NULL, *elem = NULL; 4489928cc5SHong Zhang const cJSON *item = NULL; 4589928cc5SHong Zhang char **filenames, **groupnames, **matnames; 4689928cc5SHong Zhang int i, n; 4789928cc5SHong Zhang if (!content) return 0; 4889928cc5SHong Zhang matrix_json = cJSON_Parse(content); 4989928cc5SHong Zhang if (!matrix_json) return 0; 5089928cc5SHong Zhang n = cJSON_GetArraySize(matrix_json); 5189928cc5SHong Zhang *nmat = n; 5289928cc5SHong Zhang filenames = (char **)malloc(sizeof(char *) * n); 5389928cc5SHong Zhang groupnames = (char **)malloc(sizeof(char *) * n); 5489928cc5SHong Zhang matnames = (char **)malloc(sizeof(char *) * n); 5589928cc5SHong Zhang for (i = 0; i < n; i++) { 5689928cc5SHong Zhang elem = cJSON_GetArrayItem(matrix_json, i); 5789928cc5SHong Zhang item = cJSON_GetObjectItemCaseSensitive(elem, "filename"); 5889928cc5SHong Zhang filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1)); 5989928cc5SHong Zhang strcpy(filenames[i], item->valuestring); 6089928cc5SHong Zhang problem = cJSON_GetObjectItemCaseSensitive(elem, "problem"); 6189928cc5SHong Zhang item = cJSON_GetObjectItemCaseSensitive(problem, "group"); 6289928cc5SHong Zhang groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1); 6389928cc5SHong Zhang strcpy(groupnames[i], item->valuestring); 6489928cc5SHong Zhang item = cJSON_GetObjectItemCaseSensitive(problem, "name"); 6589928cc5SHong Zhang matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1); 6689928cc5SHong Zhang strcpy(matnames[i], item->valuestring); 6789928cc5SHong Zhang } 6889928cc5SHong Zhang cJSON_Delete(matrix_json); 6989928cc5SHong Zhang free(content); 7089928cc5SHong Zhang *outputfilenames = filenames; 7189928cc5SHong Zhang *outputgroupnames = groupnames; 7289928cc5SHong Zhang *outputmatnames = matnames; 7389928cc5SHong Zhang return 0; 7489928cc5SHong Zhang } 7589928cc5SHong Zhang 7689928cc5SHong Zhang int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions) 7789928cc5SHong Zhang { 7889928cc5SHong Zhang char *content = read_file(inputjsonfile); 7989928cc5SHong Zhang cJSON *matrix_json = NULL; 8089928cc5SHong Zhang cJSON *elem = NULL; 8189928cc5SHong Zhang int i, n; 8289928cc5SHong Zhang if (!content) return 0; 8389928cc5SHong Zhang matrix_json = cJSON_Parse(content); 8489928cc5SHong Zhang if (!matrix_json) return 0; 8589928cc5SHong Zhang n = cJSON_GetArraySize(matrix_json); 8689928cc5SHong Zhang for (i = 0; i < n; i++) { 8789928cc5SHong Zhang cJSON *spmv = NULL; 8889928cc5SHong Zhang cJSON *format = NULL; 8989928cc5SHong Zhang elem = cJSON_GetArrayItem(matrix_json, i); 9089928cc5SHong Zhang spmv = cJSON_GetObjectItem(elem, "spmv"); 9189928cc5SHong Zhang if (spmv) { 9289928cc5SHong Zhang format = cJSON_GetObjectItem(spmv, matformat); 9389928cc5SHong Zhang if (format) { 9489928cc5SHong Zhang cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 9589928cc5SHong Zhang cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions); 9689928cc5SHong Zhang } else { 9789928cc5SHong Zhang format = cJSON_CreateObject(); 9889928cc5SHong Zhang cJSON_AddItemToObject(spmv, matformat, format); 9989928cc5SHong Zhang cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 10089928cc5SHong Zhang cJSON_AddNumberToObject(format, "repetitions", repetitions); 10189928cc5SHong Zhang } 10289928cc5SHong Zhang } else { 10389928cc5SHong Zhang spmv = cJSON_CreateObject(); 10489928cc5SHong Zhang cJSON_AddItemToObject(elem, "spmv", spmv); 10589928cc5SHong Zhang format = cJSON_CreateObject(); 10689928cc5SHong Zhang cJSON_AddItemToObject(spmv, matformat, format); 10789928cc5SHong Zhang cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions); 10889928cc5SHong Zhang cJSON_AddNumberToObject(format, "repetitions", repetitions); 10989928cc5SHong Zhang } 11089928cc5SHong Zhang } 11189928cc5SHong Zhang free(content); 11289928cc5SHong Zhang content = cJSON_Print(matrix_json); 11389928cc5SHong Zhang write_file(inputjsonfile, content); 11489928cc5SHong Zhang cJSON_Delete(matrix_json); 11589928cc5SHong Zhang free(content); 11689928cc5SHong Zhang return 0; 11789928cc5SHong Zhang } 11889928cc5SHong Zhang 11989928cc5SHong Zhang /* 12089928cc5SHong Zhang For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU. 121aaa8cc7dSPierre Jolivet The extra CPU copy allows us to destroy the GPU matrix and recreate it efficiently 12289928cc5SHong Zhang in each repetition. As a result, each MatMult call is fresh, and we can capture 12389928cc5SHong Zhang the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect 12489928cc5SHong Zhang during consecutive calls. 12589928cc5SHong Zhang */ 12689928cc5SHong Zhang PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions) 12789928cc5SHong Zhang { 12889928cc5SHong Zhang Mat A2 = NULL; 12989928cc5SHong Zhang PetscInt i; 13089928cc5SHong Zhang Vec u; 13189928cc5SHong Zhang PetscLogDouble vstart = 0, vend = 0; 132773bf0f6SHong Zhang PetscBool isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip; 13389928cc5SHong Zhang 1343ba16761SJacob Faibussowitsch PetscFunctionBeginUser; 13589928cc5SHong Zhang PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse)); 136773bf0f6SHong Zhang PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse)); 13789928cc5SHong Zhang PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos)); 138d4fb9cc0SHong Zhang PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda)); 139773bf0f6SHong Zhang PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip)); 140d4fb9cc0SHong Zhang if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA)); 14189928cc5SHong Zhang if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS)); 142773bf0f6SHong Zhang if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP)); 14389928cc5SHong Zhang PetscCall(VecDuplicate(b, &u)); 14489928cc5SHong Zhang if (time) *time = 0.0; 14589928cc5SHong Zhang for (i = 0; i < repetitions; i++) { 14689928cc5SHong Zhang if (use_gpu) { 14789928cc5SHong Zhang PetscCall(MatDestroy(&A2)); 14889928cc5SHong Zhang PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2)); 149*b79e0553SHong Zhang PetscCall(MatSetType(A2, petscmatformat)); 150f640565fSHong Zhang PetscCall(MatSetFromOptions(A2)); // This allows to change parameters such as slice height in SpMV kernels for SELL 15189928cc5SHong Zhang } else A2 = A; 15289928cc5SHong Zhang /* Timing MatMult */ 15389928cc5SHong Zhang if (time) PetscCall(PetscTime(&vstart)); 15489928cc5SHong Zhang 15589928cc5SHong Zhang PetscCall(MatMult(A2, b, u)); 15689928cc5SHong Zhang 15789928cc5SHong Zhang if (time) { 15889928cc5SHong Zhang PetscCall(PetscTime(&vend)); 15989928cc5SHong Zhang *time += (PetscReal)(vend - vstart); 16089928cc5SHong Zhang } 16189928cc5SHong Zhang } 16289928cc5SHong Zhang PetscCall(VecDestroy(&u)); 16389928cc5SHong Zhang if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2)); 1643ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 16589928cc5SHong Zhang } 16689928cc5SHong Zhang 167f640565fSHong Zhang PetscErrorCode WarmUpDevice(Mat A, Vec b, const char *petscmatformat) 168f640565fSHong Zhang { 169*b79e0553SHong Zhang Mat A2 = NULL; 170f640565fSHong Zhang PetscLogEvent event; 171f640565fSHong Zhang Vec u; 172f640565fSHong Zhang PetscBool isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip; 173f640565fSHong Zhang 174f640565fSHong Zhang PetscFunctionBeginUser; 175f640565fSHong Zhang PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse)); 176f640565fSHong Zhang PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse)); 177f640565fSHong Zhang PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos)); 178f640565fSHong Zhang PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda)); 179f640565fSHong Zhang PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip)); 180f640565fSHong Zhang if (!isaijcusparse && !isaijkokkos && !isaijhipsparse && !issellcuda && !issellhip) PetscFunctionReturn(PETSC_SUCCESS); 181f640565fSHong Zhang if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA)); 182f640565fSHong Zhang if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS)); 183f640565fSHong Zhang if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP)); 184f640565fSHong Zhang PetscCall(VecDuplicate(b, &u)); 185*b79e0553SHong Zhang PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2)); 186*b79e0553SHong Zhang PetscCall(MatSetType(A2, petscmatformat)); 187f640565fSHong Zhang PetscCall(PetscLogEventGetId("MatMult", &event)); 188f640565fSHong Zhang PetscCall(PetscLogEventDeactivatePush(event)); 189*b79e0553SHong Zhang PetscCall(MatMult(A2, b, u)); 190f640565fSHong Zhang PetscCall(PetscLogEventDeactivatePop(event)); 191f640565fSHong Zhang PetscCall(VecDestroy(&u)); 192*b79e0553SHong Zhang PetscCall(MatDestroy(&A2)); 193f640565fSHong Zhang PetscFunctionReturn(PETSC_SUCCESS); 194f640565fSHong Zhang } 195f640565fSHong Zhang 19689928cc5SHong Zhang PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat) 19789928cc5SHong Zhang { 19889928cc5SHong Zhang PetscLogEvent event; 19989928cc5SHong Zhang PetscEventPerfInfo eventInfo; 20089928cc5SHong Zhang // PetscReal gpuflopRate; 20189928cc5SHong Zhang 20289928cc5SHong Zhang // if (matformat) { 20389928cc5SHong Zhang // PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event)); 20489928cc5SHong Zhang // } else { 20589928cc5SHong Zhang // PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event)); 20689928cc5SHong Zhang // } 20789928cc5SHong Zhang // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo)); 20889928cc5SHong Zhang // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time)); 20989928cc5SHong Zhang 2103ba16761SJacob Faibussowitsch PetscFunctionBeginUser; 21189928cc5SHong Zhang PetscCall(PetscLogEventGetId("MatMult", &event)); 21289928cc5SHong Zhang PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo)); 21389928cc5SHong Zhang // gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime; 21489928cc5SHong Zhang // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time)); 21589928cc5SHong Zhang if (cputime) *cputime = eventInfo.time; 21689928cc5SHong Zhang #if defined(PETSC_HAVE_DEVICE) 21789928cc5SHong Zhang if (gputime) *gputime = eventInfo.GpuTime; 21889928cc5SHong Zhang if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6; 21989928cc5SHong Zhang #endif 2203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 22189928cc5SHong Zhang } 22289928cc5SHong Zhang 22389928cc5SHong Zhang PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat) 22489928cc5SHong Zhang { 22589928cc5SHong Zhang PetscBool iscsr, issell, iscsrkokkos; 2263ba16761SJacob Faibussowitsch 2273ba16761SJacob Faibussowitsch PetscFunctionBeginUser; 22889928cc5SHong Zhang PetscCall(PetscStrcmp(matformat, "csr", &iscsr)); 22989928cc5SHong Zhang if (iscsr) { 230773bf0f6SHong Zhang if (use_gpu) { 231773bf0f6SHong Zhang #if defined(PETSC_HAVE_CUDA) 232773bf0f6SHong Zhang PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat)); 233773bf0f6SHong Zhang #endif 234773bf0f6SHong Zhang #if defined(PETSC_HAVE_HIP) 235773bf0f6SHong Zhang PetscCall(PetscStrallocpy(MATAIJHIPSPARSE, petscmatformat)); 236773bf0f6SHong Zhang #endif 237773bf0f6SHong Zhang } else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat)); 23889928cc5SHong Zhang } else { 23989928cc5SHong Zhang PetscCall(PetscStrcmp(matformat, "sell", &issell)); 24089928cc5SHong Zhang if (issell) { 241773bf0f6SHong Zhang if (use_gpu) { 242773bf0f6SHong Zhang #if defined(PETSC_HAVE_CUDA) 243773bf0f6SHong Zhang PetscCall(PetscStrallocpy(MATSELLCUDA, petscmatformat)); 244773bf0f6SHong Zhang #endif 245773bf0f6SHong Zhang #if defined(PETSC_HAVE_HIP) 246773bf0f6SHong Zhang PetscCall(PetscStrallocpy(MATSELLHIP, petscmatformat)); 247773bf0f6SHong Zhang #endif 248773bf0f6SHong Zhang } else PetscCall(PetscStrallocpy(MATSELL, petscmatformat)); 24989928cc5SHong Zhang } else { 25089928cc5SHong Zhang PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos)); 25189928cc5SHong Zhang if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat)); 25289928cc5SHong Zhang } 25389928cc5SHong Zhang } 2543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25589928cc5SHong Zhang } 25689928cc5SHong Zhang 25789928cc5SHong Zhang int main(int argc, char **args) 25889928cc5SHong Zhang { 25989928cc5SHong Zhang PetscInt nmat = 1, nformats = 5, i, j, repetitions = 1; 26089928cc5SHong Zhang Mat A; 26189928cc5SHong Zhang Vec b; 26289928cc5SHong Zhang char jfilename[PETSC_MAX_PATH_LEN]; 26389928cc5SHong Zhang char filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN]; 26489928cc5SHong Zhang char groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN]; 26589928cc5SHong Zhang char *matformats[5]; 26689928cc5SHong Zhang char **filenames = NULL, **groupnames = NULL, **matnames = NULL; 267f06bc391SHong Zhang char ordering[256] = MATORDERINGRCM; 268f06bc391SHong Zhang PetscBool bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE; 269f06bc391SHong Zhang IS rowperm = NULL, colperm = NULL; 27089928cc5SHong Zhang PetscViewer fd; 27189928cc5SHong Zhang PetscReal starting_spmv_time = 0, *spmv_times; 27289928cc5SHong Zhang 27389928cc5SHong Zhang PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time -log_view :/dev/null")); 27489928cc5SHong Zhang PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 27589928cc5SHong Zhang PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1)); 27689928cc5SHong Zhang if (!flg1) { 27789928cc5SHong Zhang nformats = 1; 27889928cc5SHong Zhang PetscCall(PetscStrallocpy("csr", &matformats[0])); 27989928cc5SHong Zhang } 28089928cc5SHong Zhang PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL)); 28189928cc5SHong Zhang PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL)); 28289928cc5SHong Zhang /* Read matrix and RHS */ 28389928cc5SHong Zhang PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL)); 28489928cc5SHong Zhang PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL)); 28589928cc5SHong Zhang PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1)); 28689928cc5SHong Zhang PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2)); 28789928cc5SHong Zhang PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3)); 288f06bc391SHong Zhang PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", ""); 289f06bc391SHong Zhang PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute)); 290f06bc391SHong Zhang PetscOptionsEnd(); 29189928cc5SHong Zhang #if !defined(PETSC_HAVE_DEVICE) 29289928cc5SHong Zhang PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support"); 29389928cc5SHong Zhang #endif 29489928cc5SHong Zhang PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format"); 29589928cc5SHong Zhang if (flg3) { 29689928cc5SHong Zhang ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat); 29789928cc5SHong Zhang PetscCall(PetscCalloc1(nmat, &spmv_times)); 29889928cc5SHong Zhang } else if (flg2) { 29989928cc5SHong Zhang PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE)); 30089928cc5SHong Zhang } else if (flg1) { 30189928cc5SHong Zhang PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd)); 30289928cc5SHong Zhang PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 30389928cc5SHong Zhang PetscCall(MatSetType(A, MATAIJ)); 30489928cc5SHong Zhang PetscCall(MatSetFromOptions(A)); 30589928cc5SHong Zhang PetscCall(MatLoad(A, fd)); 30689928cc5SHong Zhang PetscCall(PetscViewerDestroy(&fd)); 30789928cc5SHong Zhang } 308f06bc391SHong Zhang if (permute) { 309f06bc391SHong Zhang Mat Aperm; 310f06bc391SHong Zhang PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm)); 311f06bc391SHong Zhang PetscCall(MatPermute(A, rowperm, colperm, &Aperm)); 312f06bc391SHong Zhang PetscCall(MatDestroy(&A)); 313f06bc391SHong Zhang A = Aperm; /* Replace original operator with permuted version */ 314f06bc391SHong Zhang } 31589928cc5SHong Zhang /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */ 31689928cc5SHong Zhang PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg)); 31789928cc5SHong Zhang if (bflg) { 31889928cc5SHong Zhang PetscViewer fb; 31989928cc5SHong Zhang PetscCall(VecCreate(PETSC_COMM_WORLD, &b)); 32089928cc5SHong Zhang PetscCall(VecSetFromOptions(b)); 32189928cc5SHong Zhang PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb)); 32289928cc5SHong Zhang PetscCall(VecLoad(b, fb)); 32389928cc5SHong Zhang PetscCall(PetscViewerDestroy(&fb)); 32489928cc5SHong Zhang } 32589928cc5SHong Zhang 32689928cc5SHong Zhang for (j = 0; j < nformats; j++) { 32789928cc5SHong Zhang char *petscmatformat = NULL; 3283ba16761SJacob Faibussowitsch PetscCall(MapToPetscMatType(matformats[j], use_gpu, &petscmatformat)); 32989928cc5SHong Zhang PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]); 33089928cc5SHong Zhang if (flg3) { // mat names specified in a JSON file 33189928cc5SHong Zhang for (i = 0; i < nmat; i++) { 33289928cc5SHong Zhang PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE)); 33389928cc5SHong Zhang if (!bflg) { 33489928cc5SHong Zhang PetscCall(MatCreateVecs(A, &b, NULL)); 33589928cc5SHong Zhang PetscCall(VecSet(b, 1.0)); 33689928cc5SHong Zhang } 337f640565fSHong Zhang if (use_gpu) PetscCall(WarmUpDevice(A, b, petscmatformat)); 33889928cc5SHong Zhang PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions)); 33989928cc5SHong Zhang if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat)); 34089928cc5SHong Zhang else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat)); 34189928cc5SHong Zhang PetscCall(MatDestroy(&A)); 34289928cc5SHong Zhang if (!bflg) PetscCall(VecDestroy(&b)); 34389928cc5SHong Zhang } 34489928cc5SHong Zhang UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions); 34589928cc5SHong Zhang starting_spmv_time = spmv_times[nmat - 1]; 34689928cc5SHong Zhang } else { 34789928cc5SHong Zhang PetscReal spmv_time; 34889928cc5SHong Zhang if (!bflg) { 34989928cc5SHong Zhang PetscCall(MatCreateVecs(A, &b, NULL)); 35089928cc5SHong Zhang PetscCall(VecSet(b, 1.0)); 35189928cc5SHong Zhang } 352f640565fSHong Zhang if (use_gpu) PetscCall(WarmUpDevice(A, b, petscmatformat)); 35389928cc5SHong Zhang PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions)); 35489928cc5SHong Zhang if (!bflg) PetscCall(VecDestroy(&b)); 35589928cc5SHong Zhang } 35689928cc5SHong Zhang PetscCall(PetscFree(petscmatformat)); 35789928cc5SHong Zhang } 35889928cc5SHong Zhang if (flg3) { 35989928cc5SHong Zhang for (i = 0; i < nmat; i++) { 36089928cc5SHong Zhang free(filenames[i]); 36189928cc5SHong Zhang free(groupnames[i]); 36289928cc5SHong Zhang free(matnames[i]); 36389928cc5SHong Zhang } 36489928cc5SHong Zhang free(filenames); 36589928cc5SHong Zhang free(groupnames); 36689928cc5SHong Zhang free(matnames); 36789928cc5SHong Zhang PetscCall(PetscFree(spmv_times)); 36889928cc5SHong Zhang } 36989928cc5SHong Zhang for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j])); 37089928cc5SHong Zhang if (flg1 || flg2) PetscCall(MatDestroy(&A)); 37189928cc5SHong Zhang if (bflg) PetscCall(VecDestroy(&b)); 372f06bc391SHong Zhang PetscCall(ISDestroy(&rowperm)); 373f06bc391SHong Zhang PetscCall(ISDestroy(&colperm)); 37489928cc5SHong Zhang PetscCall(PetscFinalize()); 37589928cc5SHong Zhang return 0; 37689928cc5SHong Zhang } 37789928cc5SHong Zhang /*TEST 37889928cc5SHong Zhang 37989928cc5SHong Zhang build: 38089928cc5SHong Zhang requires: !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES) 38189928cc5SHong Zhang depends: mmloader.c mmio.c cJSON.c 38289928cc5SHong Zhang 38389928cc5SHong Zhang test: 38489928cc5SHong Zhang suffix: 1 38589928cc5SHong Zhang args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx 38689928cc5SHong Zhang 38789928cc5SHong Zhang test: 38889928cc5SHong Zhang suffix: 2 38989928cc5SHong Zhang args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu 39089928cc5SHong Zhang output_file: output/bench_spmv_1.out 39189928cc5SHong Zhang requires: cuda 39289928cc5SHong Zhang 393773bf0f6SHong Zhang test: 394773bf0f6SHong Zhang suffix: 3 395773bf0f6SHong Zhang args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu 396773bf0f6SHong Zhang output_file: output/bench_spmv_1.out 397773bf0f6SHong Zhang requires: hip 398773bf0f6SHong Zhang 39989928cc5SHong Zhang TEST*/ 400