xref: /petsc/src/mat/tests/bench_spmv.c (revision 89928cc5142e867cb7b0dd1ff74e0acffcd6b4b5)
1 static char help[] = "Driver for benchmarking SpMV.";
2 
3 #include <petscmat.h>
4 #include "cJSON.h"
5 #include "mmloader.h"
6 
7 char *read_file(const char *filename)
8 {
9   FILE  *file       = NULL;
10   long   length     = 0;
11   char  *content    = NULL;
12   size_t read_chars = 0;
13 
14   /* open in read binary mode */
15   file = fopen(filename, "rb");
16   if (file) {
17     /* get the length */
18     fseek(file, 0, SEEK_END);
19     length = ftell(file);
20     fseek(file, 0, SEEK_SET);
21     /* allocate content buffer */
22     content = (char *)malloc((size_t)length + sizeof(""));
23     /* read the file into memory */
24     read_chars          = fread(content, sizeof(char), (size_t)length, file);
25     content[read_chars] = '\0';
26     fclose(file);
27   }
28   return content;
29 }
30 
31 void write_file(const char *filename, const char *content)
32 {
33   FILE *file = NULL;
34   file       = fopen(filename, "w");
35   if (file) { fputs(content, file); }
36   fclose(file);
37 }
38 
39 int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat)
40 {
41   char        *content     = read_file(inputjsonfile);
42   cJSON       *matrix_json = NULL;
43   const cJSON *problem = NULL, *elem = NULL;
44   const cJSON *item = NULL;
45   char       **filenames, **groupnames, **matnames;
46   int          i, n;
47   if (!content) return 0;
48   matrix_json = cJSON_Parse(content);
49   if (!matrix_json) return 0;
50   n          = cJSON_GetArraySize(matrix_json);
51   *nmat      = n;
52   filenames  = (char **)malloc(sizeof(char *) * n);
53   groupnames = (char **)malloc(sizeof(char *) * n);
54   matnames   = (char **)malloc(sizeof(char *) * n);
55   for (i = 0; i < n; i++) {
56     elem         = cJSON_GetArrayItem(matrix_json, i);
57     item         = cJSON_GetObjectItemCaseSensitive(elem, "filename");
58     filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1));
59     strcpy(filenames[i], item->valuestring);
60     problem       = cJSON_GetObjectItemCaseSensitive(elem, "problem");
61     item          = cJSON_GetObjectItemCaseSensitive(problem, "group");
62     groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
63     strcpy(groupnames[i], item->valuestring);
64     item        = cJSON_GetObjectItemCaseSensitive(problem, "name");
65     matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
66     strcpy(matnames[i], item->valuestring);
67   }
68   cJSON_Delete(matrix_json);
69   free(content);
70   *outputfilenames  = filenames;
71   *outputgroupnames = groupnames;
72   *outputmatnames   = matnames;
73   return 0;
74 }
75 
76 int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions)
77 {
78   char  *content     = read_file(inputjsonfile);
79   cJSON *matrix_json = NULL;
80   cJSON *elem        = NULL;
81   int    i, n;
82   if (!content) return 0;
83   matrix_json = cJSON_Parse(content);
84   if (!matrix_json) return 0;
85   n = cJSON_GetArraySize(matrix_json);
86   for (i = 0; i < n; i++) {
87     cJSON *spmv   = NULL;
88     cJSON *format = NULL;
89     elem          = cJSON_GetArrayItem(matrix_json, i);
90     spmv          = cJSON_GetObjectItem(elem, "spmv");
91     if (spmv) {
92       format = cJSON_GetObjectItem(spmv, matformat);
93       if (format) {
94         cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
95         cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions);
96       } else {
97         format = cJSON_CreateObject();
98         cJSON_AddItemToObject(spmv, matformat, format);
99         cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
100         cJSON_AddNumberToObject(format, "repetitions", repetitions);
101       }
102     } else {
103       spmv = cJSON_CreateObject();
104       cJSON_AddItemToObject(elem, "spmv", spmv);
105       format = cJSON_CreateObject();
106       cJSON_AddItemToObject(spmv, matformat, format);
107       cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
108       cJSON_AddNumberToObject(format, "repetitions", repetitions);
109     }
110   }
111   free(content);
112   content = cJSON_Print(matrix_json);
113   write_file(inputjsonfile, content);
114   cJSON_Delete(matrix_json);
115   free(content);
116   return 0;
117 }
118 
119 /*
120   For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU.
121   The extra CPU copy allows us to destroy the GPU matrix and recreate it effciently
122   in each repetition. As a result,  each MatMult call is fresh, and we can capture
123   the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect
124   during consecutive calls.
125 */
126 PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions)
127 {
128   Mat            A2 = NULL;
129   PetscInt       i;
130   Vec            u;
131   PetscLogDouble vstart = 0, vend = 0;
132   PetscBool      isaijcusparse, isaijkokkos;
133 
134   PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
135   PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
136   if (isaijcusparse) PetscCall(VecSetType(b, VECCUDA));
137   if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
138   PetscCall(VecDuplicate(b, &u));
139   if (time) *time = 0.0;
140   for (i = 0; i < repetitions; i++) {
141     if (use_gpu) {
142       PetscCall(MatDestroy(&A2));
143       PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
144       PetscCall(MatConvert(A2, petscmatformat, MAT_INPLACE_MATRIX, &A2));
145     } else A2 = A;
146     /* Timing MatMult */
147     if (time) PetscCall(PetscTime(&vstart));
148 
149     PetscCall(MatMult(A2, b, u));
150 
151     if (time) {
152       PetscCall(PetscTime(&vend));
153       *time += (PetscReal)(vend - vstart);
154     }
155   }
156   PetscCall(VecDestroy(&u));
157   if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2));
158   return 0;
159 }
160 
161 PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat)
162 {
163   PetscLogEvent      event;
164   PetscEventPerfInfo eventInfo;
165   //PetscReal          gpuflopRate;
166 
167   // if (matformat) {
168   //   PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event));
169   // } else {
170   //  PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event));
171   // }
172   // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
173   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time));
174 
175   PetscCall(PetscLogEventGetId("MatMult", &event));
176   PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
177   //gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime;
178   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time));
179   if (cputime) *cputime = eventInfo.time;
180 #if defined(PETSC_HAVE_DEVICE)
181   if (gputime) *gputime = eventInfo.GpuTime;
182   if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6;
183 #endif
184   return 0;
185 }
186 
187 PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat)
188 {
189   PetscBool iscsr, issell, iscsrkokkos;
190   PetscCall(PetscStrcmp(matformat, "csr", &iscsr));
191   if (iscsr) {
192     if (use_gpu) PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat));
193     else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat));
194   } else {
195     PetscCall(PetscStrcmp(matformat, "sell", &issell));
196     if (issell) {
197       if (use_gpu) PetscCall(PetscStrallocpy(MATSELL, petscmatformat)); // placeholder for SELLCUDA
198       else PetscCall(PetscStrallocpy(MATSELL, petscmatformat));
199     } else {
200       PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos));
201       if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat));
202     }
203   }
204   return 0;
205 }
206 
207 int main(int argc, char **args)
208 {
209   PetscInt    nmat = 1, nformats = 5, i, j, repetitions = 1;
210   Mat         A;
211   Vec         b;
212   char        jfilename[PETSC_MAX_PATH_LEN];
213   char        filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN];
214   char        groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN];
215   char       *matformats[5];
216   char      **filenames = NULL, **groupnames = NULL, **matnames = NULL;
217   PetscBool   bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE;
218   PetscViewer fd;
219   PetscReal   starting_spmv_time = 0, *spmv_times;
220 
221   PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time -log_view :/dev/null"));
222   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
223   PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1));
224   if (!flg1) {
225     nformats = 1;
226     PetscCall(PetscStrallocpy("csr", &matformats[0]));
227   }
228   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL));
229   PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL));
230   /* Read matrix and RHS */
231   PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL));
232   PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL));
233   PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1));
234   PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2));
235   PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3));
236 #if !defined(PETSC_HAVE_DEVICE)
237   PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support");
238 #endif
239   PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format");
240   if (flg3) {
241     ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat);
242     PetscCall(PetscCalloc1(nmat, &spmv_times));
243   } else if (flg2) {
244     PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE));
245   } else if (flg1) {
246     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd));
247     PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
248     PetscCall(MatSetType(A, MATAIJ));
249     PetscCall(MatSetFromOptions(A));
250     PetscCall(MatLoad(A, fd));
251     PetscCall(PetscViewerDestroy(&fd));
252   }
253   /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */
254   PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg));
255   if (bflg) {
256     PetscViewer fb;
257     PetscCall(VecCreate(PETSC_COMM_WORLD, &b));
258     PetscCall(VecSetFromOptions(b));
259     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb));
260     PetscCall(VecLoad(b, fb));
261     PetscCall(PetscViewerDestroy(&fb));
262   }
263 
264   for (j = 0; j < nformats; j++) {
265     char *petscmatformat = NULL;
266     MapToPetscMatType(matformats[j], use_gpu, &petscmatformat);
267     PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]);
268     if (flg3) { // mat names specified in a JSON file
269       for (i = 0; i < nmat; i++) {
270         PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE));
271         if (!bflg) {
272           PetscCall(MatCreateVecs(A, &b, NULL));
273           PetscCall(VecSet(b, 1.0));
274         }
275         PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions));
276         if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat));
277         else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat));
278         PetscCall(MatDestroy(&A));
279         if (!bflg) PetscCall(VecDestroy(&b));
280       }
281       UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions);
282       starting_spmv_time = spmv_times[nmat - 1];
283     } else {
284       PetscReal spmv_time;
285       if (!bflg) {
286         PetscCall(MatCreateVecs(A, &b, NULL));
287         PetscCall(VecSet(b, 1.0));
288       }
289       PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions));
290       if (!bflg) PetscCall(VecDestroy(&b));
291     }
292     PetscCall(PetscFree(petscmatformat));
293   }
294   if (flg3) {
295     for (i = 0; i < nmat; i++) {
296       free(filenames[i]);
297       free(groupnames[i]);
298       free(matnames[i]);
299     }
300     free(filenames);
301     free(groupnames);
302     free(matnames);
303     PetscCall(PetscFree(spmv_times));
304   }
305   for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j]));
306   if (flg1 || flg2) PetscCall(MatDestroy(&A));
307   if (bflg) PetscCall(VecDestroy(&b));
308   PetscCall(PetscFinalize());
309   return 0;
310 }
311 /*TEST
312 
313    build:
314       requires:  !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES)
315       depends: mmloader.c mmio.c cJSON.c
316 
317    test:
318       suffix: 1
319       args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx
320 
321    test:
322       suffix: 2
323       args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
324       output_file: output/bench_spmv_1.out
325       requires: cuda
326 
327 TEST*/
328