xref: /petsc/src/mat/tests/bench_spmv.c (revision 89928cc5142e867cb7b0dd1ff74e0acffcd6b4b5)
1*89928cc5SHong Zhang static char help[] = "Driver for benchmarking SpMV.";
2*89928cc5SHong Zhang 
3*89928cc5SHong Zhang #include <petscmat.h>
4*89928cc5SHong Zhang #include "cJSON.h"
5*89928cc5SHong Zhang #include "mmloader.h"
6*89928cc5SHong Zhang 
7*89928cc5SHong Zhang char *read_file(const char *filename)
8*89928cc5SHong Zhang {
9*89928cc5SHong Zhang   FILE  *file       = NULL;
10*89928cc5SHong Zhang   long   length     = 0;
11*89928cc5SHong Zhang   char  *content    = NULL;
12*89928cc5SHong Zhang   size_t read_chars = 0;
13*89928cc5SHong Zhang 
14*89928cc5SHong Zhang   /* open in read binary mode */
15*89928cc5SHong Zhang   file = fopen(filename, "rb");
16*89928cc5SHong Zhang   if (file) {
17*89928cc5SHong Zhang     /* get the length */
18*89928cc5SHong Zhang     fseek(file, 0, SEEK_END);
19*89928cc5SHong Zhang     length = ftell(file);
20*89928cc5SHong Zhang     fseek(file, 0, SEEK_SET);
21*89928cc5SHong Zhang     /* allocate content buffer */
22*89928cc5SHong Zhang     content = (char *)malloc((size_t)length + sizeof(""));
23*89928cc5SHong Zhang     /* read the file into memory */
24*89928cc5SHong Zhang     read_chars          = fread(content, sizeof(char), (size_t)length, file);
25*89928cc5SHong Zhang     content[read_chars] = '\0';
26*89928cc5SHong Zhang     fclose(file);
27*89928cc5SHong Zhang   }
28*89928cc5SHong Zhang   return content;
29*89928cc5SHong Zhang }
30*89928cc5SHong Zhang 
31*89928cc5SHong Zhang void write_file(const char *filename, const char *content)
32*89928cc5SHong Zhang {
33*89928cc5SHong Zhang   FILE *file = NULL;
34*89928cc5SHong Zhang   file       = fopen(filename, "w");
35*89928cc5SHong Zhang   if (file) { fputs(content, file); }
36*89928cc5SHong Zhang   fclose(file);
37*89928cc5SHong Zhang }
38*89928cc5SHong Zhang 
39*89928cc5SHong Zhang int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat)
40*89928cc5SHong Zhang {
41*89928cc5SHong Zhang   char        *content     = read_file(inputjsonfile);
42*89928cc5SHong Zhang   cJSON       *matrix_json = NULL;
43*89928cc5SHong Zhang   const cJSON *problem = NULL, *elem = NULL;
44*89928cc5SHong Zhang   const cJSON *item = NULL;
45*89928cc5SHong Zhang   char       **filenames, **groupnames, **matnames;
46*89928cc5SHong Zhang   int          i, n;
47*89928cc5SHong Zhang   if (!content) return 0;
48*89928cc5SHong Zhang   matrix_json = cJSON_Parse(content);
49*89928cc5SHong Zhang   if (!matrix_json) return 0;
50*89928cc5SHong Zhang   n          = cJSON_GetArraySize(matrix_json);
51*89928cc5SHong Zhang   *nmat      = n;
52*89928cc5SHong Zhang   filenames  = (char **)malloc(sizeof(char *) * n);
53*89928cc5SHong Zhang   groupnames = (char **)malloc(sizeof(char *) * n);
54*89928cc5SHong Zhang   matnames   = (char **)malloc(sizeof(char *) * n);
55*89928cc5SHong Zhang   for (i = 0; i < n; i++) {
56*89928cc5SHong Zhang     elem         = cJSON_GetArrayItem(matrix_json, i);
57*89928cc5SHong Zhang     item         = cJSON_GetObjectItemCaseSensitive(elem, "filename");
58*89928cc5SHong Zhang     filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1));
59*89928cc5SHong Zhang     strcpy(filenames[i], item->valuestring);
60*89928cc5SHong Zhang     problem       = cJSON_GetObjectItemCaseSensitive(elem, "problem");
61*89928cc5SHong Zhang     item          = cJSON_GetObjectItemCaseSensitive(problem, "group");
62*89928cc5SHong Zhang     groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
63*89928cc5SHong Zhang     strcpy(groupnames[i], item->valuestring);
64*89928cc5SHong Zhang     item        = cJSON_GetObjectItemCaseSensitive(problem, "name");
65*89928cc5SHong Zhang     matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
66*89928cc5SHong Zhang     strcpy(matnames[i], item->valuestring);
67*89928cc5SHong Zhang   }
68*89928cc5SHong Zhang   cJSON_Delete(matrix_json);
69*89928cc5SHong Zhang   free(content);
70*89928cc5SHong Zhang   *outputfilenames  = filenames;
71*89928cc5SHong Zhang   *outputgroupnames = groupnames;
72*89928cc5SHong Zhang   *outputmatnames   = matnames;
73*89928cc5SHong Zhang   return 0;
74*89928cc5SHong Zhang }
75*89928cc5SHong Zhang 
76*89928cc5SHong Zhang int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions)
77*89928cc5SHong Zhang {
78*89928cc5SHong Zhang   char  *content     = read_file(inputjsonfile);
79*89928cc5SHong Zhang   cJSON *matrix_json = NULL;
80*89928cc5SHong Zhang   cJSON *elem        = NULL;
81*89928cc5SHong Zhang   int    i, n;
82*89928cc5SHong Zhang   if (!content) return 0;
83*89928cc5SHong Zhang   matrix_json = cJSON_Parse(content);
84*89928cc5SHong Zhang   if (!matrix_json) return 0;
85*89928cc5SHong Zhang   n = cJSON_GetArraySize(matrix_json);
86*89928cc5SHong Zhang   for (i = 0; i < n; i++) {
87*89928cc5SHong Zhang     cJSON *spmv   = NULL;
88*89928cc5SHong Zhang     cJSON *format = NULL;
89*89928cc5SHong Zhang     elem          = cJSON_GetArrayItem(matrix_json, i);
90*89928cc5SHong Zhang     spmv          = cJSON_GetObjectItem(elem, "spmv");
91*89928cc5SHong Zhang     if (spmv) {
92*89928cc5SHong Zhang       format = cJSON_GetObjectItem(spmv, matformat);
93*89928cc5SHong Zhang       if (format) {
94*89928cc5SHong Zhang         cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
95*89928cc5SHong Zhang         cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions);
96*89928cc5SHong Zhang       } else {
97*89928cc5SHong Zhang         format = cJSON_CreateObject();
98*89928cc5SHong Zhang         cJSON_AddItemToObject(spmv, matformat, format);
99*89928cc5SHong Zhang         cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
100*89928cc5SHong Zhang         cJSON_AddNumberToObject(format, "repetitions", repetitions);
101*89928cc5SHong Zhang       }
102*89928cc5SHong Zhang     } else {
103*89928cc5SHong Zhang       spmv = cJSON_CreateObject();
104*89928cc5SHong Zhang       cJSON_AddItemToObject(elem, "spmv", spmv);
105*89928cc5SHong Zhang       format = cJSON_CreateObject();
106*89928cc5SHong Zhang       cJSON_AddItemToObject(spmv, matformat, format);
107*89928cc5SHong Zhang       cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
108*89928cc5SHong Zhang       cJSON_AddNumberToObject(format, "repetitions", repetitions);
109*89928cc5SHong Zhang     }
110*89928cc5SHong Zhang   }
111*89928cc5SHong Zhang   free(content);
112*89928cc5SHong Zhang   content = cJSON_Print(matrix_json);
113*89928cc5SHong Zhang   write_file(inputjsonfile, content);
114*89928cc5SHong Zhang   cJSON_Delete(matrix_json);
115*89928cc5SHong Zhang   free(content);
116*89928cc5SHong Zhang   return 0;
117*89928cc5SHong Zhang }
118*89928cc5SHong Zhang 
119*89928cc5SHong Zhang /*
120*89928cc5SHong Zhang   For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU.
121*89928cc5SHong Zhang   The extra CPU copy allows us to destroy the GPU matrix and recreate it effciently
122*89928cc5SHong Zhang   in each repetition. As a result,  each MatMult call is fresh, and we can capture
123*89928cc5SHong Zhang   the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect
124*89928cc5SHong Zhang   during consecutive calls.
125*89928cc5SHong Zhang */
126*89928cc5SHong Zhang PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions)
127*89928cc5SHong Zhang {
128*89928cc5SHong Zhang   Mat            A2 = NULL;
129*89928cc5SHong Zhang   PetscInt       i;
130*89928cc5SHong Zhang   Vec            u;
131*89928cc5SHong Zhang   PetscLogDouble vstart = 0, vend = 0;
132*89928cc5SHong Zhang   PetscBool      isaijcusparse, isaijkokkos;
133*89928cc5SHong Zhang 
134*89928cc5SHong Zhang   PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
135*89928cc5SHong Zhang   PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
136*89928cc5SHong Zhang   if (isaijcusparse) PetscCall(VecSetType(b, VECCUDA));
137*89928cc5SHong Zhang   if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
138*89928cc5SHong Zhang   PetscCall(VecDuplicate(b, &u));
139*89928cc5SHong Zhang   if (time) *time = 0.0;
140*89928cc5SHong Zhang   for (i = 0; i < repetitions; i++) {
141*89928cc5SHong Zhang     if (use_gpu) {
142*89928cc5SHong Zhang       PetscCall(MatDestroy(&A2));
143*89928cc5SHong Zhang       PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
144*89928cc5SHong Zhang       PetscCall(MatConvert(A2, petscmatformat, MAT_INPLACE_MATRIX, &A2));
145*89928cc5SHong Zhang     } else A2 = A;
146*89928cc5SHong Zhang     /* Timing MatMult */
147*89928cc5SHong Zhang     if (time) PetscCall(PetscTime(&vstart));
148*89928cc5SHong Zhang 
149*89928cc5SHong Zhang     PetscCall(MatMult(A2, b, u));
150*89928cc5SHong Zhang 
151*89928cc5SHong Zhang     if (time) {
152*89928cc5SHong Zhang       PetscCall(PetscTime(&vend));
153*89928cc5SHong Zhang       *time += (PetscReal)(vend - vstart);
154*89928cc5SHong Zhang     }
155*89928cc5SHong Zhang   }
156*89928cc5SHong Zhang   PetscCall(VecDestroy(&u));
157*89928cc5SHong Zhang   if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2));
158*89928cc5SHong Zhang   return 0;
159*89928cc5SHong Zhang }
160*89928cc5SHong Zhang 
161*89928cc5SHong Zhang PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat)
162*89928cc5SHong Zhang {
163*89928cc5SHong Zhang   PetscLogEvent      event;
164*89928cc5SHong Zhang   PetscEventPerfInfo eventInfo;
165*89928cc5SHong Zhang   //PetscReal          gpuflopRate;
166*89928cc5SHong Zhang 
167*89928cc5SHong Zhang   // if (matformat) {
168*89928cc5SHong Zhang   //   PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event));
169*89928cc5SHong Zhang   // } else {
170*89928cc5SHong Zhang   //  PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event));
171*89928cc5SHong Zhang   // }
172*89928cc5SHong Zhang   // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
173*89928cc5SHong Zhang   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time));
174*89928cc5SHong Zhang 
175*89928cc5SHong Zhang   PetscCall(PetscLogEventGetId("MatMult", &event));
176*89928cc5SHong Zhang   PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
177*89928cc5SHong Zhang   //gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime;
178*89928cc5SHong Zhang   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time));
179*89928cc5SHong Zhang   if (cputime) *cputime = eventInfo.time;
180*89928cc5SHong Zhang #if defined(PETSC_HAVE_DEVICE)
181*89928cc5SHong Zhang   if (gputime) *gputime = eventInfo.GpuTime;
182*89928cc5SHong Zhang   if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6;
183*89928cc5SHong Zhang #endif
184*89928cc5SHong Zhang   return 0;
185*89928cc5SHong Zhang }
186*89928cc5SHong Zhang 
187*89928cc5SHong Zhang PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat)
188*89928cc5SHong Zhang {
189*89928cc5SHong Zhang   PetscBool iscsr, issell, iscsrkokkos;
190*89928cc5SHong Zhang   PetscCall(PetscStrcmp(matformat, "csr", &iscsr));
191*89928cc5SHong Zhang   if (iscsr) {
192*89928cc5SHong Zhang     if (use_gpu) PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat));
193*89928cc5SHong Zhang     else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat));
194*89928cc5SHong Zhang   } else {
195*89928cc5SHong Zhang     PetscCall(PetscStrcmp(matformat, "sell", &issell));
196*89928cc5SHong Zhang     if (issell) {
197*89928cc5SHong Zhang       if (use_gpu) PetscCall(PetscStrallocpy(MATSELL, petscmatformat)); // placeholder for SELLCUDA
198*89928cc5SHong Zhang       else PetscCall(PetscStrallocpy(MATSELL, petscmatformat));
199*89928cc5SHong Zhang     } else {
200*89928cc5SHong Zhang       PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos));
201*89928cc5SHong Zhang       if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat));
202*89928cc5SHong Zhang     }
203*89928cc5SHong Zhang   }
204*89928cc5SHong Zhang   return 0;
205*89928cc5SHong Zhang }
206*89928cc5SHong Zhang 
207*89928cc5SHong Zhang int main(int argc, char **args)
208*89928cc5SHong Zhang {
209*89928cc5SHong Zhang   PetscInt    nmat = 1, nformats = 5, i, j, repetitions = 1;
210*89928cc5SHong Zhang   Mat         A;
211*89928cc5SHong Zhang   Vec         b;
212*89928cc5SHong Zhang   char        jfilename[PETSC_MAX_PATH_LEN];
213*89928cc5SHong Zhang   char        filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN];
214*89928cc5SHong Zhang   char        groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN];
215*89928cc5SHong Zhang   char       *matformats[5];
216*89928cc5SHong Zhang   char      **filenames = NULL, **groupnames = NULL, **matnames = NULL;
217*89928cc5SHong Zhang   PetscBool   bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE;
218*89928cc5SHong Zhang   PetscViewer fd;
219*89928cc5SHong Zhang   PetscReal   starting_spmv_time = 0, *spmv_times;
220*89928cc5SHong Zhang 
221*89928cc5SHong Zhang   PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time -log_view :/dev/null"));
222*89928cc5SHong Zhang   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
223*89928cc5SHong Zhang   PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1));
224*89928cc5SHong Zhang   if (!flg1) {
225*89928cc5SHong Zhang     nformats = 1;
226*89928cc5SHong Zhang     PetscCall(PetscStrallocpy("csr", &matformats[0]));
227*89928cc5SHong Zhang   }
228*89928cc5SHong Zhang   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL));
229*89928cc5SHong Zhang   PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL));
230*89928cc5SHong Zhang   /* Read matrix and RHS */
231*89928cc5SHong Zhang   PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL));
232*89928cc5SHong Zhang   PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL));
233*89928cc5SHong Zhang   PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1));
234*89928cc5SHong Zhang   PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2));
235*89928cc5SHong Zhang   PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3));
236*89928cc5SHong Zhang #if !defined(PETSC_HAVE_DEVICE)
237*89928cc5SHong Zhang   PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support");
238*89928cc5SHong Zhang #endif
239*89928cc5SHong Zhang   PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format");
240*89928cc5SHong Zhang   if (flg3) {
241*89928cc5SHong Zhang     ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat);
242*89928cc5SHong Zhang     PetscCall(PetscCalloc1(nmat, &spmv_times));
243*89928cc5SHong Zhang   } else if (flg2) {
244*89928cc5SHong Zhang     PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE));
245*89928cc5SHong Zhang   } else if (flg1) {
246*89928cc5SHong Zhang     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd));
247*89928cc5SHong Zhang     PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
248*89928cc5SHong Zhang     PetscCall(MatSetType(A, MATAIJ));
249*89928cc5SHong Zhang     PetscCall(MatSetFromOptions(A));
250*89928cc5SHong Zhang     PetscCall(MatLoad(A, fd));
251*89928cc5SHong Zhang     PetscCall(PetscViewerDestroy(&fd));
252*89928cc5SHong Zhang   }
253*89928cc5SHong Zhang   /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */
254*89928cc5SHong Zhang   PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg));
255*89928cc5SHong Zhang   if (bflg) {
256*89928cc5SHong Zhang     PetscViewer fb;
257*89928cc5SHong Zhang     PetscCall(VecCreate(PETSC_COMM_WORLD, &b));
258*89928cc5SHong Zhang     PetscCall(VecSetFromOptions(b));
259*89928cc5SHong Zhang     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb));
260*89928cc5SHong Zhang     PetscCall(VecLoad(b, fb));
261*89928cc5SHong Zhang     PetscCall(PetscViewerDestroy(&fb));
262*89928cc5SHong Zhang   }
263*89928cc5SHong Zhang 
264*89928cc5SHong Zhang   for (j = 0; j < nformats; j++) {
265*89928cc5SHong Zhang     char *petscmatformat = NULL;
266*89928cc5SHong Zhang     MapToPetscMatType(matformats[j], use_gpu, &petscmatformat);
267*89928cc5SHong Zhang     PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]);
268*89928cc5SHong Zhang     if (flg3) { // mat names specified in a JSON file
269*89928cc5SHong Zhang       for (i = 0; i < nmat; i++) {
270*89928cc5SHong Zhang         PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE));
271*89928cc5SHong Zhang         if (!bflg) {
272*89928cc5SHong Zhang           PetscCall(MatCreateVecs(A, &b, NULL));
273*89928cc5SHong Zhang           PetscCall(VecSet(b, 1.0));
274*89928cc5SHong Zhang         }
275*89928cc5SHong Zhang         PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions));
276*89928cc5SHong Zhang         if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat));
277*89928cc5SHong Zhang         else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat));
278*89928cc5SHong Zhang         PetscCall(MatDestroy(&A));
279*89928cc5SHong Zhang         if (!bflg) PetscCall(VecDestroy(&b));
280*89928cc5SHong Zhang       }
281*89928cc5SHong Zhang       UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions);
282*89928cc5SHong Zhang       starting_spmv_time = spmv_times[nmat - 1];
283*89928cc5SHong Zhang     } else {
284*89928cc5SHong Zhang       PetscReal spmv_time;
285*89928cc5SHong Zhang       if (!bflg) {
286*89928cc5SHong Zhang         PetscCall(MatCreateVecs(A, &b, NULL));
287*89928cc5SHong Zhang         PetscCall(VecSet(b, 1.0));
288*89928cc5SHong Zhang       }
289*89928cc5SHong Zhang       PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions));
290*89928cc5SHong Zhang       if (!bflg) PetscCall(VecDestroy(&b));
291*89928cc5SHong Zhang     }
292*89928cc5SHong Zhang     PetscCall(PetscFree(petscmatformat));
293*89928cc5SHong Zhang   }
294*89928cc5SHong Zhang   if (flg3) {
295*89928cc5SHong Zhang     for (i = 0; i < nmat; i++) {
296*89928cc5SHong Zhang       free(filenames[i]);
297*89928cc5SHong Zhang       free(groupnames[i]);
298*89928cc5SHong Zhang       free(matnames[i]);
299*89928cc5SHong Zhang     }
300*89928cc5SHong Zhang     free(filenames);
301*89928cc5SHong Zhang     free(groupnames);
302*89928cc5SHong Zhang     free(matnames);
303*89928cc5SHong Zhang     PetscCall(PetscFree(spmv_times));
304*89928cc5SHong Zhang   }
305*89928cc5SHong Zhang   for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j]));
306*89928cc5SHong Zhang   if (flg1 || flg2) PetscCall(MatDestroy(&A));
307*89928cc5SHong Zhang   if (bflg) PetscCall(VecDestroy(&b));
308*89928cc5SHong Zhang   PetscCall(PetscFinalize());
309*89928cc5SHong Zhang   return 0;
310*89928cc5SHong Zhang }
311*89928cc5SHong Zhang /*TEST
312*89928cc5SHong Zhang 
313*89928cc5SHong Zhang    build:
314*89928cc5SHong Zhang       requires:  !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES)
315*89928cc5SHong Zhang       depends: mmloader.c mmio.c cJSON.c
316*89928cc5SHong Zhang 
317*89928cc5SHong Zhang    test:
318*89928cc5SHong Zhang       suffix: 1
319*89928cc5SHong Zhang       args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx
320*89928cc5SHong Zhang 
321*89928cc5SHong Zhang    test:
322*89928cc5SHong Zhang       suffix: 2
323*89928cc5SHong Zhang       args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
324*89928cc5SHong Zhang       output_file: output/bench_spmv_1.out
325*89928cc5SHong Zhang       requires: cuda
326*89928cc5SHong Zhang 
327*89928cc5SHong Zhang TEST*/
328