xref: /petsc/src/mat/tests/bench_spmv.c (revision aaa8cc7d2a5c3913edcbb923e20f154fe9c4aa65)
189928cc5SHong Zhang static char help[] = "Driver for benchmarking SpMV.";
289928cc5SHong Zhang 
389928cc5SHong Zhang #include <petscmat.h>
489928cc5SHong Zhang #include "cJSON.h"
589928cc5SHong Zhang #include "mmloader.h"
689928cc5SHong Zhang 
789928cc5SHong Zhang char *read_file(const char *filename)
889928cc5SHong Zhang {
989928cc5SHong Zhang   FILE  *file       = NULL;
1089928cc5SHong Zhang   long   length     = 0;
1189928cc5SHong Zhang   char  *content    = NULL;
1289928cc5SHong Zhang   size_t read_chars = 0;
1389928cc5SHong Zhang 
1489928cc5SHong Zhang   /* open in read binary mode */
1589928cc5SHong Zhang   file = fopen(filename, "rb");
1689928cc5SHong Zhang   if (file) {
1789928cc5SHong Zhang     /* get the length */
1889928cc5SHong Zhang     fseek(file, 0, SEEK_END);
1989928cc5SHong Zhang     length = ftell(file);
2089928cc5SHong Zhang     fseek(file, 0, SEEK_SET);
2189928cc5SHong Zhang     /* allocate content buffer */
2289928cc5SHong Zhang     content = (char *)malloc((size_t)length + sizeof(""));
2389928cc5SHong Zhang     /* read the file into memory */
2489928cc5SHong Zhang     read_chars          = fread(content, sizeof(char), (size_t)length, file);
2589928cc5SHong Zhang     content[read_chars] = '\0';
2689928cc5SHong Zhang     fclose(file);
2789928cc5SHong Zhang   }
2889928cc5SHong Zhang   return content;
2989928cc5SHong Zhang }
3089928cc5SHong Zhang 
3189928cc5SHong Zhang void write_file(const char *filename, const char *content)
3289928cc5SHong Zhang {
3389928cc5SHong Zhang   FILE *file = NULL;
3489928cc5SHong Zhang   file       = fopen(filename, "w");
3589928cc5SHong Zhang   if (file) { fputs(content, file); }
3689928cc5SHong Zhang   fclose(file);
3789928cc5SHong Zhang }
3889928cc5SHong Zhang 
3989928cc5SHong Zhang int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat)
4089928cc5SHong Zhang {
4189928cc5SHong Zhang   char        *content     = read_file(inputjsonfile);
4289928cc5SHong Zhang   cJSON       *matrix_json = NULL;
4389928cc5SHong Zhang   const cJSON *problem = NULL, *elem = NULL;
4489928cc5SHong Zhang   const cJSON *item = NULL;
4589928cc5SHong Zhang   char       **filenames, **groupnames, **matnames;
4689928cc5SHong Zhang   int          i, n;
4789928cc5SHong Zhang   if (!content) return 0;
4889928cc5SHong Zhang   matrix_json = cJSON_Parse(content);
4989928cc5SHong Zhang   if (!matrix_json) return 0;
5089928cc5SHong Zhang   n          = cJSON_GetArraySize(matrix_json);
5189928cc5SHong Zhang   *nmat      = n;
5289928cc5SHong Zhang   filenames  = (char **)malloc(sizeof(char *) * n);
5389928cc5SHong Zhang   groupnames = (char **)malloc(sizeof(char *) * n);
5489928cc5SHong Zhang   matnames   = (char **)malloc(sizeof(char *) * n);
5589928cc5SHong Zhang   for (i = 0; i < n; i++) {
5689928cc5SHong Zhang     elem         = cJSON_GetArrayItem(matrix_json, i);
5789928cc5SHong Zhang     item         = cJSON_GetObjectItemCaseSensitive(elem, "filename");
5889928cc5SHong Zhang     filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1));
5989928cc5SHong Zhang     strcpy(filenames[i], item->valuestring);
6089928cc5SHong Zhang     problem       = cJSON_GetObjectItemCaseSensitive(elem, "problem");
6189928cc5SHong Zhang     item          = cJSON_GetObjectItemCaseSensitive(problem, "group");
6289928cc5SHong Zhang     groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
6389928cc5SHong Zhang     strcpy(groupnames[i], item->valuestring);
6489928cc5SHong Zhang     item        = cJSON_GetObjectItemCaseSensitive(problem, "name");
6589928cc5SHong Zhang     matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
6689928cc5SHong Zhang     strcpy(matnames[i], item->valuestring);
6789928cc5SHong Zhang   }
6889928cc5SHong Zhang   cJSON_Delete(matrix_json);
6989928cc5SHong Zhang   free(content);
7089928cc5SHong Zhang   *outputfilenames  = filenames;
7189928cc5SHong Zhang   *outputgroupnames = groupnames;
7289928cc5SHong Zhang   *outputmatnames   = matnames;
7389928cc5SHong Zhang   return 0;
7489928cc5SHong Zhang }
7589928cc5SHong Zhang 
7689928cc5SHong Zhang int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions)
7789928cc5SHong Zhang {
7889928cc5SHong Zhang   char  *content     = read_file(inputjsonfile);
7989928cc5SHong Zhang   cJSON *matrix_json = NULL;
8089928cc5SHong Zhang   cJSON *elem        = NULL;
8189928cc5SHong Zhang   int    i, n;
8289928cc5SHong Zhang   if (!content) return 0;
8389928cc5SHong Zhang   matrix_json = cJSON_Parse(content);
8489928cc5SHong Zhang   if (!matrix_json) return 0;
8589928cc5SHong Zhang   n = cJSON_GetArraySize(matrix_json);
8689928cc5SHong Zhang   for (i = 0; i < n; i++) {
8789928cc5SHong Zhang     cJSON *spmv   = NULL;
8889928cc5SHong Zhang     cJSON *format = NULL;
8989928cc5SHong Zhang     elem          = cJSON_GetArrayItem(matrix_json, i);
9089928cc5SHong Zhang     spmv          = cJSON_GetObjectItem(elem, "spmv");
9189928cc5SHong Zhang     if (spmv) {
9289928cc5SHong Zhang       format = cJSON_GetObjectItem(spmv, matformat);
9389928cc5SHong Zhang       if (format) {
9489928cc5SHong Zhang         cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
9589928cc5SHong Zhang         cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions);
9689928cc5SHong Zhang       } else {
9789928cc5SHong Zhang         format = cJSON_CreateObject();
9889928cc5SHong Zhang         cJSON_AddItemToObject(spmv, matformat, format);
9989928cc5SHong Zhang         cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
10089928cc5SHong Zhang         cJSON_AddNumberToObject(format, "repetitions", repetitions);
10189928cc5SHong Zhang       }
10289928cc5SHong Zhang     } else {
10389928cc5SHong Zhang       spmv = cJSON_CreateObject();
10489928cc5SHong Zhang       cJSON_AddItemToObject(elem, "spmv", spmv);
10589928cc5SHong Zhang       format = cJSON_CreateObject();
10689928cc5SHong Zhang       cJSON_AddItemToObject(spmv, matformat, format);
10789928cc5SHong Zhang       cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
10889928cc5SHong Zhang       cJSON_AddNumberToObject(format, "repetitions", repetitions);
10989928cc5SHong Zhang     }
11089928cc5SHong Zhang   }
11189928cc5SHong Zhang   free(content);
11289928cc5SHong Zhang   content = cJSON_Print(matrix_json);
11389928cc5SHong Zhang   write_file(inputjsonfile, content);
11489928cc5SHong Zhang   cJSON_Delete(matrix_json);
11589928cc5SHong Zhang   free(content);
11689928cc5SHong Zhang   return 0;
11789928cc5SHong Zhang }
11889928cc5SHong Zhang 
11989928cc5SHong Zhang /*
12089928cc5SHong Zhang   For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU.
121*aaa8cc7dSPierre Jolivet   The extra CPU copy allows us to destroy the GPU matrix and recreate it efficiently
12289928cc5SHong Zhang   in each repetition. As a result,  each MatMult call is fresh, and we can capture
12389928cc5SHong Zhang   the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect
12489928cc5SHong Zhang   during consecutive calls.
12589928cc5SHong Zhang */
12689928cc5SHong Zhang PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions)
12789928cc5SHong Zhang {
12889928cc5SHong Zhang   Mat            A2 = NULL;
12989928cc5SHong Zhang   PetscInt       i;
13089928cc5SHong Zhang   Vec            u;
13189928cc5SHong Zhang   PetscLogDouble vstart = 0, vend = 0;
13289928cc5SHong Zhang   PetscBool      isaijcusparse, isaijkokkos;
13389928cc5SHong Zhang 
1343ba16761SJacob Faibussowitsch   PetscFunctionBeginUser;
13589928cc5SHong Zhang   PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
13689928cc5SHong Zhang   PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
13789928cc5SHong Zhang   if (isaijcusparse) PetscCall(VecSetType(b, VECCUDA));
13889928cc5SHong Zhang   if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
13989928cc5SHong Zhang   PetscCall(VecDuplicate(b, &u));
14089928cc5SHong Zhang   if (time) *time = 0.0;
14189928cc5SHong Zhang   for (i = 0; i < repetitions; i++) {
14289928cc5SHong Zhang     if (use_gpu) {
14389928cc5SHong Zhang       PetscCall(MatDestroy(&A2));
14489928cc5SHong Zhang       PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
14589928cc5SHong Zhang       PetscCall(MatConvert(A2, petscmatformat, MAT_INPLACE_MATRIX, &A2));
14689928cc5SHong Zhang     } else A2 = A;
14789928cc5SHong Zhang     /* Timing MatMult */
14889928cc5SHong Zhang     if (time) PetscCall(PetscTime(&vstart));
14989928cc5SHong Zhang 
15089928cc5SHong Zhang     PetscCall(MatMult(A2, b, u));
15189928cc5SHong Zhang 
15289928cc5SHong Zhang     if (time) {
15389928cc5SHong Zhang       PetscCall(PetscTime(&vend));
15489928cc5SHong Zhang       *time += (PetscReal)(vend - vstart);
15589928cc5SHong Zhang     }
15689928cc5SHong Zhang   }
15789928cc5SHong Zhang   PetscCall(VecDestroy(&u));
15889928cc5SHong Zhang   if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2));
1593ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16089928cc5SHong Zhang }
16189928cc5SHong Zhang 
16289928cc5SHong Zhang PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat)
16389928cc5SHong Zhang {
16489928cc5SHong Zhang   PetscLogEvent      event;
16589928cc5SHong Zhang   PetscEventPerfInfo eventInfo;
16689928cc5SHong Zhang   //PetscReal          gpuflopRate;
16789928cc5SHong Zhang 
16889928cc5SHong Zhang   // if (matformat) {
16989928cc5SHong Zhang   //   PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event));
17089928cc5SHong Zhang   // } else {
17189928cc5SHong Zhang   //  PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event));
17289928cc5SHong Zhang   // }
17389928cc5SHong Zhang   // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
17489928cc5SHong Zhang   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time));
17589928cc5SHong Zhang 
1763ba16761SJacob Faibussowitsch   PetscFunctionBeginUser;
17789928cc5SHong Zhang   PetscCall(PetscLogEventGetId("MatMult", &event));
17889928cc5SHong Zhang   PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
17989928cc5SHong Zhang   //gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime;
18089928cc5SHong Zhang   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time));
18189928cc5SHong Zhang   if (cputime) *cputime = eventInfo.time;
18289928cc5SHong Zhang #if defined(PETSC_HAVE_DEVICE)
18389928cc5SHong Zhang   if (gputime) *gputime = eventInfo.GpuTime;
18489928cc5SHong Zhang   if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6;
18589928cc5SHong Zhang #endif
1863ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
18789928cc5SHong Zhang }
18889928cc5SHong Zhang 
18989928cc5SHong Zhang PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat)
19089928cc5SHong Zhang {
19189928cc5SHong Zhang   PetscBool iscsr, issell, iscsrkokkos;
1923ba16761SJacob Faibussowitsch 
1933ba16761SJacob Faibussowitsch   PetscFunctionBeginUser;
19489928cc5SHong Zhang   PetscCall(PetscStrcmp(matformat, "csr", &iscsr));
19589928cc5SHong Zhang   if (iscsr) {
19689928cc5SHong Zhang     if (use_gpu) PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat));
19789928cc5SHong Zhang     else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat));
19889928cc5SHong Zhang   } else {
19989928cc5SHong Zhang     PetscCall(PetscStrcmp(matformat, "sell", &issell));
20089928cc5SHong Zhang     if (issell) {
20189928cc5SHong Zhang       if (use_gpu) PetscCall(PetscStrallocpy(MATSELL, petscmatformat)); // placeholder for SELLCUDA
20289928cc5SHong Zhang       else PetscCall(PetscStrallocpy(MATSELL, petscmatformat));
20389928cc5SHong Zhang     } else {
20489928cc5SHong Zhang       PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos));
20589928cc5SHong Zhang       if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat));
20689928cc5SHong Zhang     }
20789928cc5SHong Zhang   }
2083ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
20989928cc5SHong Zhang }
21089928cc5SHong Zhang 
21189928cc5SHong Zhang int main(int argc, char **args)
21289928cc5SHong Zhang {
21389928cc5SHong Zhang   PetscInt    nmat = 1, nformats = 5, i, j, repetitions = 1;
21489928cc5SHong Zhang   Mat         A;
21589928cc5SHong Zhang   Vec         b;
21689928cc5SHong Zhang   char        jfilename[PETSC_MAX_PATH_LEN];
21789928cc5SHong Zhang   char        filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN];
21889928cc5SHong Zhang   char        groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN];
21989928cc5SHong Zhang   char       *matformats[5];
22089928cc5SHong Zhang   char      **filenames = NULL, **groupnames = NULL, **matnames = NULL;
221f06bc391SHong Zhang   char        ordering[256] = MATORDERINGRCM;
222f06bc391SHong Zhang   PetscBool   bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE;
223f06bc391SHong Zhang   IS          rowperm = NULL, colperm = NULL;
22489928cc5SHong Zhang   PetscViewer fd;
22589928cc5SHong Zhang   PetscReal   starting_spmv_time = 0, *spmv_times;
22689928cc5SHong Zhang 
22789928cc5SHong Zhang   PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time -log_view :/dev/null"));
22889928cc5SHong Zhang   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
22989928cc5SHong Zhang   PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1));
23089928cc5SHong Zhang   if (!flg1) {
23189928cc5SHong Zhang     nformats = 1;
23289928cc5SHong Zhang     PetscCall(PetscStrallocpy("csr", &matformats[0]));
23389928cc5SHong Zhang   }
23489928cc5SHong Zhang   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL));
23589928cc5SHong Zhang   PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL));
23689928cc5SHong Zhang   /* Read matrix and RHS */
23789928cc5SHong Zhang   PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL));
23889928cc5SHong Zhang   PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL));
23989928cc5SHong Zhang   PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1));
24089928cc5SHong Zhang   PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2));
24189928cc5SHong Zhang   PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3));
242f06bc391SHong Zhang   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", "");
243f06bc391SHong Zhang   PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute));
244f06bc391SHong Zhang   PetscOptionsEnd();
24589928cc5SHong Zhang #if !defined(PETSC_HAVE_DEVICE)
24689928cc5SHong Zhang   PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support");
24789928cc5SHong Zhang #endif
24889928cc5SHong Zhang   PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format");
24989928cc5SHong Zhang   if (flg3) {
25089928cc5SHong Zhang     ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat);
25189928cc5SHong Zhang     PetscCall(PetscCalloc1(nmat, &spmv_times));
25289928cc5SHong Zhang   } else if (flg2) {
25389928cc5SHong Zhang     PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE));
25489928cc5SHong Zhang   } else if (flg1) {
25589928cc5SHong Zhang     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd));
25689928cc5SHong Zhang     PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
25789928cc5SHong Zhang     PetscCall(MatSetType(A, MATAIJ));
25889928cc5SHong Zhang     PetscCall(MatSetFromOptions(A));
25989928cc5SHong Zhang     PetscCall(MatLoad(A, fd));
26089928cc5SHong Zhang     PetscCall(PetscViewerDestroy(&fd));
26189928cc5SHong Zhang   }
262f06bc391SHong Zhang   if (permute) {
263f06bc391SHong Zhang     Mat Aperm;
264f06bc391SHong Zhang     PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm));
265f06bc391SHong Zhang     PetscCall(MatPermute(A, rowperm, colperm, &Aperm));
266f06bc391SHong Zhang     PetscCall(MatDestroy(&A));
267f06bc391SHong Zhang     A = Aperm; /* Replace original operator with permuted version */
268f06bc391SHong Zhang   }
26989928cc5SHong Zhang   /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */
27089928cc5SHong Zhang   PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg));
27189928cc5SHong Zhang   if (bflg) {
27289928cc5SHong Zhang     PetscViewer fb;
27389928cc5SHong Zhang     PetscCall(VecCreate(PETSC_COMM_WORLD, &b));
27489928cc5SHong Zhang     PetscCall(VecSetFromOptions(b));
27589928cc5SHong Zhang     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb));
27689928cc5SHong Zhang     PetscCall(VecLoad(b, fb));
27789928cc5SHong Zhang     PetscCall(PetscViewerDestroy(&fb));
27889928cc5SHong Zhang   }
27989928cc5SHong Zhang 
28089928cc5SHong Zhang   for (j = 0; j < nformats; j++) {
28189928cc5SHong Zhang     char *petscmatformat = NULL;
2823ba16761SJacob Faibussowitsch     PetscCall(MapToPetscMatType(matformats[j], use_gpu, &petscmatformat));
28389928cc5SHong Zhang     PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]);
28489928cc5SHong Zhang     if (flg3) { // mat names specified in a JSON file
28589928cc5SHong Zhang       for (i = 0; i < nmat; i++) {
28689928cc5SHong Zhang         PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE));
28789928cc5SHong Zhang         if (!bflg) {
28889928cc5SHong Zhang           PetscCall(MatCreateVecs(A, &b, NULL));
28989928cc5SHong Zhang           PetscCall(VecSet(b, 1.0));
29089928cc5SHong Zhang         }
29189928cc5SHong Zhang         PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions));
29289928cc5SHong Zhang         if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat));
29389928cc5SHong Zhang         else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat));
29489928cc5SHong Zhang         PetscCall(MatDestroy(&A));
29589928cc5SHong Zhang         if (!bflg) PetscCall(VecDestroy(&b));
29689928cc5SHong Zhang       }
29789928cc5SHong Zhang       UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions);
29889928cc5SHong Zhang       starting_spmv_time = spmv_times[nmat - 1];
29989928cc5SHong Zhang     } else {
30089928cc5SHong Zhang       PetscReal spmv_time;
30189928cc5SHong Zhang       if (!bflg) {
30289928cc5SHong Zhang         PetscCall(MatCreateVecs(A, &b, NULL));
30389928cc5SHong Zhang         PetscCall(VecSet(b, 1.0));
30489928cc5SHong Zhang       }
30589928cc5SHong Zhang       PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions));
30689928cc5SHong Zhang       if (!bflg) PetscCall(VecDestroy(&b));
30789928cc5SHong Zhang     }
30889928cc5SHong Zhang     PetscCall(PetscFree(petscmatformat));
30989928cc5SHong Zhang   }
31089928cc5SHong Zhang   if (flg3) {
31189928cc5SHong Zhang     for (i = 0; i < nmat; i++) {
31289928cc5SHong Zhang       free(filenames[i]);
31389928cc5SHong Zhang       free(groupnames[i]);
31489928cc5SHong Zhang       free(matnames[i]);
31589928cc5SHong Zhang     }
31689928cc5SHong Zhang     free(filenames);
31789928cc5SHong Zhang     free(groupnames);
31889928cc5SHong Zhang     free(matnames);
31989928cc5SHong Zhang     PetscCall(PetscFree(spmv_times));
32089928cc5SHong Zhang   }
32189928cc5SHong Zhang   for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j]));
32289928cc5SHong Zhang   if (flg1 || flg2) PetscCall(MatDestroy(&A));
32389928cc5SHong Zhang   if (bflg) PetscCall(VecDestroy(&b));
324f06bc391SHong Zhang   PetscCall(ISDestroy(&rowperm));
325f06bc391SHong Zhang   PetscCall(ISDestroy(&colperm));
32689928cc5SHong Zhang   PetscCall(PetscFinalize());
32789928cc5SHong Zhang   return 0;
32889928cc5SHong Zhang }
32989928cc5SHong Zhang /*TEST
33089928cc5SHong Zhang 
33189928cc5SHong Zhang    build:
33289928cc5SHong Zhang       requires:  !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES)
33389928cc5SHong Zhang       depends: mmloader.c mmio.c cJSON.c
33489928cc5SHong Zhang 
33589928cc5SHong Zhang    test:
33689928cc5SHong Zhang       suffix: 1
33789928cc5SHong Zhang       args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx
33889928cc5SHong Zhang 
33989928cc5SHong Zhang    test:
34089928cc5SHong Zhang       suffix: 2
34189928cc5SHong Zhang       args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
34289928cc5SHong Zhang       output_file: output/bench_spmv_1.out
34389928cc5SHong Zhang       requires: cuda
34489928cc5SHong Zhang 
34589928cc5SHong Zhang TEST*/
346