xref: /petsc/src/mat/tests/bench_spmv.c (revision f640565fd0d451502e1dd140bee6310f9cf4cc3b)
1 static char help[] = "Driver for benchmarking SpMV.";
2 
3 #include <petscmat.h>
4 #include "cJSON.h"
5 #include "mmloader.h"
6 
7 char *read_file(const char *filename)
8 {
9   FILE  *file       = NULL;
10   long   length     = 0;
11   char  *content    = NULL;
12   size_t read_chars = 0;
13 
14   /* open in read binary mode */
15   file = fopen(filename, "rb");
16   if (file) {
17     /* get the length */
18     fseek(file, 0, SEEK_END);
19     length = ftell(file);
20     fseek(file, 0, SEEK_SET);
21     /* allocate content buffer */
22     content = (char *)malloc((size_t)length + sizeof(""));
23     /* read the file into memory */
24     read_chars          = fread(content, sizeof(char), (size_t)length, file);
25     content[read_chars] = '\0';
26     fclose(file);
27   }
28   return content;
29 }
30 
31 void write_file(const char *filename, const char *content)
32 {
33   FILE *file = NULL;
34   file       = fopen(filename, "w");
35   if (file) { fputs(content, file); }
36   fclose(file);
37 }
38 
39 int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat)
40 {
41   char        *content     = read_file(inputjsonfile);
42   cJSON       *matrix_json = NULL;
43   const cJSON *problem = NULL, *elem = NULL;
44   const cJSON *item = NULL;
45   char       **filenames, **groupnames, **matnames;
46   int          i, n;
47   if (!content) return 0;
48   matrix_json = cJSON_Parse(content);
49   if (!matrix_json) return 0;
50   n          = cJSON_GetArraySize(matrix_json);
51   *nmat      = n;
52   filenames  = (char **)malloc(sizeof(char *) * n);
53   groupnames = (char **)malloc(sizeof(char *) * n);
54   matnames   = (char **)malloc(sizeof(char *) * n);
55   for (i = 0; i < n; i++) {
56     elem         = cJSON_GetArrayItem(matrix_json, i);
57     item         = cJSON_GetObjectItemCaseSensitive(elem, "filename");
58     filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1));
59     strcpy(filenames[i], item->valuestring);
60     problem       = cJSON_GetObjectItemCaseSensitive(elem, "problem");
61     item          = cJSON_GetObjectItemCaseSensitive(problem, "group");
62     groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
63     strcpy(groupnames[i], item->valuestring);
64     item        = cJSON_GetObjectItemCaseSensitive(problem, "name");
65     matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
66     strcpy(matnames[i], item->valuestring);
67   }
68   cJSON_Delete(matrix_json);
69   free(content);
70   *outputfilenames  = filenames;
71   *outputgroupnames = groupnames;
72   *outputmatnames   = matnames;
73   return 0;
74 }
75 
76 int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions)
77 {
78   char  *content     = read_file(inputjsonfile);
79   cJSON *matrix_json = NULL;
80   cJSON *elem        = NULL;
81   int    i, n;
82   if (!content) return 0;
83   matrix_json = cJSON_Parse(content);
84   if (!matrix_json) return 0;
85   n = cJSON_GetArraySize(matrix_json);
86   for (i = 0; i < n; i++) {
87     cJSON *spmv   = NULL;
88     cJSON *format = NULL;
89     elem          = cJSON_GetArrayItem(matrix_json, i);
90     spmv          = cJSON_GetObjectItem(elem, "spmv");
91     if (spmv) {
92       format = cJSON_GetObjectItem(spmv, matformat);
93       if (format) {
94         cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
95         cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions);
96       } else {
97         format = cJSON_CreateObject();
98         cJSON_AddItemToObject(spmv, matformat, format);
99         cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
100         cJSON_AddNumberToObject(format, "repetitions", repetitions);
101       }
102     } else {
103       spmv = cJSON_CreateObject();
104       cJSON_AddItemToObject(elem, "spmv", spmv);
105       format = cJSON_CreateObject();
106       cJSON_AddItemToObject(spmv, matformat, format);
107       cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
108       cJSON_AddNumberToObject(format, "repetitions", repetitions);
109     }
110   }
111   free(content);
112   content = cJSON_Print(matrix_json);
113   write_file(inputjsonfile, content);
114   cJSON_Delete(matrix_json);
115   free(content);
116   return 0;
117 }
118 
119 /*
120   For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU.
121   The extra CPU copy allows us to destroy the GPU matrix and recreate it efficiently
122   in each repetition. As a result,  each MatMult call is fresh, and we can capture
123   the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect
124   during consecutive calls.
125 */
126 PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions)
127 {
128   Mat            A2 = NULL;
129   PetscInt       i;
130   Vec            u;
131   PetscLogDouble vstart = 0, vend = 0;
132   PetscBool      isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip;
133 
134   PetscFunctionBeginUser;
135   PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
136   PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse));
137   PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
138   PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda));
139   PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip));
140   if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA));
141   if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
142   if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP));
143   PetscCall(VecDuplicate(b, &u));
144   if (time) *time = 0.0;
145   for (i = 0; i < repetitions; i++) {
146     if (use_gpu) {
147       PetscCall(MatDestroy(&A2));
148       PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
149       if (issellcuda) {
150         PetscCall(MatConvert(A2, MATSELL, MAT_INPLACE_MATRIX, &A2));
151         PetscCall(MatConvert(A2, MATSELLCUDA, MAT_INPLACE_MATRIX, &A2));
152       } else if (issellhip) {
153         PetscCall(MatConvert(A2, MATSELL, MAT_INPLACE_MATRIX, &A2));
154         PetscCall(MatConvert(A2, MATSELLHIP, MAT_INPLACE_MATRIX, &A2));
155       } else {
156         PetscCall(MatConvert(A2, petscmatformat, MAT_INPLACE_MATRIX, &A2));
157       }
158       PetscCall(MatSetFromOptions(A2)); // This allows to change parameters such as slice height in SpMV kernels for SELL
159     } else A2 = A;
160     /* Timing MatMult */
161     if (time) PetscCall(PetscTime(&vstart));
162 
163     PetscCall(MatMult(A2, b, u));
164 
165     if (time) {
166       PetscCall(PetscTime(&vend));
167       *time += (PetscReal)(vend - vstart);
168     }
169   }
170   PetscCall(VecDestroy(&u));
171   if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2));
172   PetscFunctionReturn(PETSC_SUCCESS);
173 }
174 
175 PetscErrorCode WarmUpDevice(Mat A, Vec b, const char *petscmatformat)
176 {
177   PetscLogEvent event;
178   Vec           u;
179   PetscBool     isaijcusparse, isaijhipsparse, isaijkokkos, issellcuda, issellhip;
180 
181   PetscFunctionBeginUser;
182   PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
183   PetscCall(PetscStrcmp(petscmatformat, MATAIJHIPSPARSE, &isaijhipsparse));
184   PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
185   PetscCall(PetscStrcmp(petscmatformat, MATSELLCUDA, &issellcuda));
186   PetscCall(PetscStrcmp(petscmatformat, MATSELLHIP, &issellhip));
187   if (!isaijcusparse && !isaijkokkos && !isaijhipsparse && !issellcuda && !issellhip) PetscFunctionReturn(PETSC_SUCCESS);
188   if (isaijcusparse || issellcuda) PetscCall(VecSetType(b, VECCUDA));
189   if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
190   if (isaijhipsparse || issellhip) PetscCall(VecSetType(b, VECHIP));
191   PetscCall(VecDuplicate(b, &u));
192   PetscCall(MatConvert(A, petscmatformat, MAT_INPLACE_MATRIX, &A));
193   PetscCall(PetscLogEventGetId("MatMult", &event));
194   PetscCall(PetscLogEventDeactivatePush(event));
195   PetscCall(MatMult(A, b, u));
196   PetscCall(PetscLogEventDeactivatePop(event));
197   PetscCall(VecDestroy(&u));
198   PetscFunctionReturn(PETSC_SUCCESS);
199 }
200 
201 PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat)
202 {
203   PetscLogEvent      event;
204   PetscEventPerfInfo eventInfo;
205   // PetscReal          gpuflopRate;
206 
207   // if (matformat) {
208   //   PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event));
209   // } else {
210   //  PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event));
211   // }
212   // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
213   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time));
214 
215   PetscFunctionBeginUser;
216   PetscCall(PetscLogEventGetId("MatMult", &event));
217   PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
218   // gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime;
219   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time));
220   if (cputime) *cputime = eventInfo.time;
221 #if defined(PETSC_HAVE_DEVICE)
222   if (gputime) *gputime = eventInfo.GpuTime;
223   if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6;
224 #endif
225   PetscFunctionReturn(PETSC_SUCCESS);
226 }
227 
228 PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat)
229 {
230   PetscBool iscsr, issell, iscsrkokkos;
231 
232   PetscFunctionBeginUser;
233   PetscCall(PetscStrcmp(matformat, "csr", &iscsr));
234   if (iscsr) {
235     if (use_gpu) {
236 #if defined(PETSC_HAVE_CUDA)
237       PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat));
238 #endif
239 #if defined(PETSC_HAVE_HIP)
240       PetscCall(PetscStrallocpy(MATAIJHIPSPARSE, petscmatformat));
241 #endif
242     } else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat));
243   } else {
244     PetscCall(PetscStrcmp(matformat, "sell", &issell));
245     if (issell) {
246       if (use_gpu) {
247 #if defined(PETSC_HAVE_CUDA)
248         PetscCall(PetscStrallocpy(MATSELLCUDA, petscmatformat));
249 #endif
250 #if defined(PETSC_HAVE_HIP)
251         PetscCall(PetscStrallocpy(MATSELLHIP, petscmatformat));
252 #endif
253       } else PetscCall(PetscStrallocpy(MATSELL, petscmatformat));
254     } else {
255       PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos));
256       if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat));
257     }
258   }
259   PetscFunctionReturn(PETSC_SUCCESS);
260 }
261 
262 int main(int argc, char **args)
263 {
264   PetscInt    nmat = 1, nformats = 5, i, j, repetitions = 1;
265   Mat         A;
266   Vec         b;
267   char        jfilename[PETSC_MAX_PATH_LEN];
268   char        filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN];
269   char        groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN];
270   char       *matformats[5];
271   char      **filenames = NULL, **groupnames = NULL, **matnames = NULL;
272   char        ordering[256] = MATORDERINGRCM;
273   PetscBool   bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE;
274   IS          rowperm = NULL, colperm = NULL;
275   PetscViewer fd;
276   PetscReal   starting_spmv_time = 0, *spmv_times;
277 
278   PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time -log_view :/dev/null"));
279   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
280   PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1));
281   if (!flg1) {
282     nformats = 1;
283     PetscCall(PetscStrallocpy("csr", &matformats[0]));
284   }
285   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL));
286   PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL));
287   /* Read matrix and RHS */
288   PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL));
289   PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL));
290   PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1));
291   PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2));
292   PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3));
293   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", "");
294   PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute));
295   PetscOptionsEnd();
296 #if !defined(PETSC_HAVE_DEVICE)
297   PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support");
298 #endif
299   PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format");
300   if (flg3) {
301     ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat);
302     PetscCall(PetscCalloc1(nmat, &spmv_times));
303   } else if (flg2) {
304     PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE));
305   } else if (flg1) {
306     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd));
307     PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
308     PetscCall(MatSetType(A, MATAIJ));
309     PetscCall(MatSetFromOptions(A));
310     PetscCall(MatLoad(A, fd));
311     PetscCall(PetscViewerDestroy(&fd));
312   }
313   if (permute) {
314     Mat Aperm;
315     PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm));
316     PetscCall(MatPermute(A, rowperm, colperm, &Aperm));
317     PetscCall(MatDestroy(&A));
318     A = Aperm; /* Replace original operator with permuted version */
319   }
320   /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */
321   PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg));
322   if (bflg) {
323     PetscViewer fb;
324     PetscCall(VecCreate(PETSC_COMM_WORLD, &b));
325     PetscCall(VecSetFromOptions(b));
326     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb));
327     PetscCall(VecLoad(b, fb));
328     PetscCall(PetscViewerDestroy(&fb));
329   }
330 
331   for (j = 0; j < nformats; j++) {
332     char *petscmatformat = NULL;
333     PetscCall(MapToPetscMatType(matformats[j], use_gpu, &petscmatformat));
334     PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]);
335     if (flg3) { // mat names specified in a JSON file
336       for (i = 0; i < nmat; i++) {
337         PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE));
338         if (!bflg) {
339           PetscCall(MatCreateVecs(A, &b, NULL));
340           PetscCall(VecSet(b, 1.0));
341         }
342         if (use_gpu) PetscCall(WarmUpDevice(A, b, petscmatformat));
343         PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions));
344         if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat));
345         else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat));
346         PetscCall(MatDestroy(&A));
347         if (!bflg) PetscCall(VecDestroy(&b));
348       }
349       UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions);
350       starting_spmv_time = spmv_times[nmat - 1];
351     } else {
352       PetscReal spmv_time;
353       if (!bflg) {
354         PetscCall(MatCreateVecs(A, &b, NULL));
355         PetscCall(VecSet(b, 1.0));
356       }
357       if (use_gpu) PetscCall(WarmUpDevice(A, b, petscmatformat));
358       PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions));
359       if (!bflg) PetscCall(VecDestroy(&b));
360     }
361     PetscCall(PetscFree(petscmatformat));
362   }
363   if (flg3) {
364     for (i = 0; i < nmat; i++) {
365       free(filenames[i]);
366       free(groupnames[i]);
367       free(matnames[i]);
368     }
369     free(filenames);
370     free(groupnames);
371     free(matnames);
372     PetscCall(PetscFree(spmv_times));
373   }
374   for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j]));
375   if (flg1 || flg2) PetscCall(MatDestroy(&A));
376   if (bflg) PetscCall(VecDestroy(&b));
377   PetscCall(ISDestroy(&rowperm));
378   PetscCall(ISDestroy(&colperm));
379   PetscCall(PetscFinalize());
380   return 0;
381 }
382 /*TEST
383 
384    build:
385       requires: !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES)
386       depends: mmloader.c mmio.c cJSON.c
387 
388    test:
389       suffix: 1
390       args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx
391 
392    test:
393       suffix: 2
394       args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
395       output_file: output/bench_spmv_1.out
396       requires: cuda
397 
398    test:
399       suffix: 3
400       args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
401       output_file: output/bench_spmv_1.out
402       requires: hip
403 
404 TEST*/
405