xref: /petsc/src/sys/logging/handler/impls/nested/xmlviewer.c (revision b93211885051a68d5a4e2963b141f71fa7a24c4c)
1*b9321188SToby Isaac /*************************************************************************************
2*b9321188SToby Isaac  *    M A R I T I M E  R E S E A R C H  I N S T I T U T E  N E T H E R L A N D S     *
3*b9321188SToby Isaac  *************************************************************************************
4*b9321188SToby Isaac  *    authors: Koos Huijssen, Christiaan M. Klaij                                    *
5*b9321188SToby Isaac  *************************************************************************************
6*b9321188SToby Isaac  *    content: Viewer for writing XML output                                         *
7*b9321188SToby Isaac  *************************************************************************************/
8*b9321188SToby Isaac #include <petscviewer.h>
9*b9321188SToby Isaac #include <petsc/private/logimpl.h>
10*b9321188SToby Isaac #include <petsc/private/fortranimpl.h>
11*b9321188SToby Isaac #include "xmlviewer.h"
12*b9321188SToby Isaac #include "lognested.h"
13*b9321188SToby Isaac 
14*b9321188SToby Isaac static PetscErrorCode PetscViewerXMLStartSection(PetscViewer viewer, const char *name, const char *desc)
15*b9321188SToby Isaac {
16*b9321188SToby Isaac   PetscInt XMLSectionDepthPetsc;
17*b9321188SToby Isaac   int      XMLSectionDepth;
18*b9321188SToby Isaac 
19*b9321188SToby Isaac   PetscFunctionBegin;
20*b9321188SToby Isaac   PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc));
21*b9321188SToby Isaac   XMLSectionDepth = (int)XMLSectionDepthPetsc;
22*b9321188SToby Isaac   if (!desc) {
23*b9321188SToby Isaac     PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s>\n", 2 * XMLSectionDepth, "", name));
24*b9321188SToby Isaac   } else {
25*b9321188SToby Isaac     PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s desc=\"%s\">\n", 2 * XMLSectionDepth, "", name, desc));
26*b9321188SToby Isaac   }
27*b9321188SToby Isaac   PetscCall(PetscViewerASCIIPushTab(viewer));
28*b9321188SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
29*b9321188SToby Isaac }
30*b9321188SToby Isaac 
31*b9321188SToby Isaac /* Initialize a viewer to XML, and initialize the XMLDepth static parameter */
32*b9321188SToby Isaac static PetscErrorCode PetscViewerInitASCII_XML(PetscViewer viewer)
33*b9321188SToby Isaac {
34*b9321188SToby Isaac   MPI_Comm comm;
35*b9321188SToby Isaac   char     PerfScript[PETSC_MAX_PATH_LEN + 40];
36*b9321188SToby Isaac 
37*b9321188SToby Isaac   PetscFunctionBegin;
38*b9321188SToby Isaac   PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
39*b9321188SToby Isaac   PetscCall(PetscViewerASCIIPrintf(viewer, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"));
40*b9321188SToby Isaac   PetscCall(PetscStrreplace(comm, "<?xml-stylesheet type=\"text/xsl\" href=\"performance_xml2html.xsl\"?>", PerfScript, sizeof(PerfScript)));
41*b9321188SToby Isaac   PetscCall(PetscViewerASCIIPrintf(viewer, "%s\n", PerfScript));
42*b9321188SToby Isaac   PetscCall(PetscViewerXMLStartSection(viewer, "root", NULL));
43*b9321188SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
44*b9321188SToby Isaac }
45*b9321188SToby Isaac 
46*b9321188SToby Isaac static PetscErrorCode PetscViewerXMLEndSection(PetscViewer viewer, const char *name)
47*b9321188SToby Isaac {
48*b9321188SToby Isaac   PetscInt XMLSectionDepthPetsc;
49*b9321188SToby Isaac   int      XMLSectionDepth;
50*b9321188SToby Isaac 
51*b9321188SToby Isaac   PetscFunctionBegin;
52*b9321188SToby Isaac   PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc));
53*b9321188SToby Isaac   XMLSectionDepth = (int)XMLSectionDepthPetsc;
54*b9321188SToby Isaac   if (XMLSectionDepth > 0) PetscCall(PetscViewerASCIIPopTab(viewer));
55*b9321188SToby Isaac   PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc));
56*b9321188SToby Isaac   XMLSectionDepth = (int)XMLSectionDepthPetsc;
57*b9321188SToby Isaac   PetscCall(PetscViewerASCIIPrintf(viewer, "%*s</%s>\n", 2 * XMLSectionDepth, "", name));
58*b9321188SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
59*b9321188SToby Isaac }
60*b9321188SToby Isaac 
61*b9321188SToby Isaac /* Initialize a viewer to XML, and initialize the XMLDepth static parameter */
62*b9321188SToby Isaac static PetscErrorCode PetscViewerFinalASCII_XML(PetscViewer viewer)
63*b9321188SToby Isaac {
64*b9321188SToby Isaac   PetscFunctionBegin;
65*b9321188SToby Isaac   PetscCall(PetscViewerXMLEndSection(viewer, "root"));
66*b9321188SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
67*b9321188SToby Isaac }
68*b9321188SToby Isaac 
69*b9321188SToby Isaac static PetscErrorCode PetscViewerXMLPutString(PetscViewer viewer, const char *name, const char *desc, const char *value)
70*b9321188SToby Isaac {
71*b9321188SToby Isaac   PetscInt XMLSectionDepthPetsc;
72*b9321188SToby Isaac   int      XMLSectionDepth;
73*b9321188SToby Isaac 
74*b9321188SToby Isaac   PetscFunctionBegin;
75*b9321188SToby Isaac   PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc));
76*b9321188SToby Isaac   XMLSectionDepth = (int)XMLSectionDepthPetsc;
77*b9321188SToby Isaac   if (!desc) {
78*b9321188SToby Isaac     PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s>%s</%s>\n", 2 * XMLSectionDepth, "", name, value, name));
79*b9321188SToby Isaac   } else {
80*b9321188SToby Isaac     PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s desc=\"%s\">%s</%s>\n", 2 * XMLSectionDepth, "", name, desc, value, name));
81*b9321188SToby Isaac   }
82*b9321188SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
83*b9321188SToby Isaac }
84*b9321188SToby Isaac 
85*b9321188SToby Isaac static PetscErrorCode PetscViewerXMLPutInt(PetscViewer viewer, const char *name, const char *desc, int value)
86*b9321188SToby Isaac {
87*b9321188SToby Isaac   PetscInt XMLSectionDepthPetsc;
88*b9321188SToby Isaac   int      XMLSectionDepth;
89*b9321188SToby Isaac 
90*b9321188SToby Isaac   PetscFunctionBegin;
91*b9321188SToby Isaac   PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc));
92*b9321188SToby Isaac   XMLSectionDepth = (int)XMLSectionDepthPetsc;
93*b9321188SToby Isaac   if (!desc) {
94*b9321188SToby Isaac     PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s>%d</%s>\n", 2 * XMLSectionDepth, "", name, value, name));
95*b9321188SToby Isaac   } else {
96*b9321188SToby Isaac     PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s desc=\"%s\">%d</%s>\n", 2 * XMLSectionDepth, "", name, desc, value, name));
97*b9321188SToby Isaac   }
98*b9321188SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
99*b9321188SToby Isaac }
100*b9321188SToby Isaac 
101*b9321188SToby Isaac static PetscErrorCode PetscViewerXMLPutDouble(PetscViewer viewer, const char *name, PetscLogDouble value, const char *format)
102*b9321188SToby Isaac {
103*b9321188SToby Isaac   PetscInt XMLSectionDepthPetsc;
104*b9321188SToby Isaac   int      XMLSectionDepth;
105*b9321188SToby Isaac   char     buffer[1024];
106*b9321188SToby Isaac 
107*b9321188SToby Isaac   PetscFunctionBegin;
108*b9321188SToby Isaac   PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc));
109*b9321188SToby Isaac   XMLSectionDepth = (int)XMLSectionDepthPetsc;
110*b9321188SToby Isaac   PetscCall(PetscSNPrintf(buffer, sizeof(buffer), "%*s<%s>%s</%s>\n", 2 * XMLSectionDepth, "", name, format, name));
111*b9321188SToby Isaac   PetscCall(PetscViewerASCIIPrintf(viewer, buffer, value));
112*b9321188SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
113*b9321188SToby Isaac }
114*b9321188SToby Isaac 
115*b9321188SToby Isaac static PetscErrorCode PetscPrintExeSpecs(PetscViewer viewer)
116*b9321188SToby Isaac {
117*b9321188SToby Isaac   char        arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128];
118*b9321188SToby Isaac   char        version[256], buildoptions[128] = "";
119*b9321188SToby Isaac   PetscMPIInt size;
120*b9321188SToby Isaac   size_t      len;
121*b9321188SToby Isaac 
122*b9321188SToby Isaac   PetscFunctionBegin;
123*b9321188SToby Isaac   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)viewer), &size));
124*b9321188SToby Isaac   PetscCall(PetscGetArchType(arch, sizeof(arch)));
125*b9321188SToby Isaac   PetscCall(PetscGetHostName(hostname, sizeof(hostname)));
126*b9321188SToby Isaac   PetscCall(PetscGetUserName(username, sizeof(username)));
127*b9321188SToby Isaac   PetscCall(PetscGetProgramName(pname, sizeof(pname)));
128*b9321188SToby Isaac   PetscCall(PetscGetDate(date, sizeof(date)));
129*b9321188SToby Isaac   PetscCall(PetscGetVersion(version, sizeof(version)));
130*b9321188SToby Isaac 
131*b9321188SToby Isaac   PetscCall(PetscViewerXMLStartSection(viewer, "runspecification", "Run Specification"));
132*b9321188SToby Isaac   PetscCall(PetscViewerXMLPutString(viewer, "executable", "Executable", pname));
133*b9321188SToby Isaac   PetscCall(PetscViewerXMLPutString(viewer, "architecture", "Architecture", arch));
134*b9321188SToby Isaac   PetscCall(PetscViewerXMLPutString(viewer, "hostname", "Host", hostname));
135*b9321188SToby Isaac   PetscCall(PetscViewerXMLPutInt(viewer, "nprocesses", "Number of processes", size));
136*b9321188SToby Isaac   PetscCall(PetscViewerXMLPutString(viewer, "user", "Run by user", username));
137*b9321188SToby Isaac   PetscCall(PetscViewerXMLPutString(viewer, "date", "Started at", date));
138*b9321188SToby Isaac   PetscCall(PetscViewerXMLPutString(viewer, "petscrelease", "Petsc Release", version));
139*b9321188SToby Isaac 
140*b9321188SToby Isaac   if (PetscDefined(USE_DEBUG)) PetscCall(PetscStrlcat(buildoptions, "Debug ", sizeof(buildoptions)));
141*b9321188SToby Isaac   if (PetscDefined(USE_COMPLEX)) PetscCall(PetscStrlcat(buildoptions, "Complex ", sizeof(buildoptions)));
142*b9321188SToby Isaac   if (PetscDefined(USE_REAL_SINGLE)) {
143*b9321188SToby Isaac     PetscCall(PetscStrlcat(buildoptions, "Single ", sizeof(buildoptions)));
144*b9321188SToby Isaac   } else if (PetscDefined(USE_REAL___FLOAT128)) {
145*b9321188SToby Isaac     PetscCall(PetscStrlcat(buildoptions, "Quadruple ", sizeof(buildoptions)));
146*b9321188SToby Isaac   } else if (PetscDefined(USE_REAL___FP16)) {
147*b9321188SToby Isaac     PetscCall(PetscStrlcat(buildoptions, "Half ", sizeof(buildoptions)));
148*b9321188SToby Isaac   }
149*b9321188SToby Isaac   if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscStrlcat(buildoptions, "Int64 ", sizeof(buildoptions)));
150*b9321188SToby Isaac #if defined(__cplusplus)
151*b9321188SToby Isaac   PetscCall(PetscStrlcat(buildoptions, "C++ ", sizeof(buildoptions)));
152*b9321188SToby Isaac #endif
153*b9321188SToby Isaac   PetscCall(PetscStrlen(buildoptions, &len));
154*b9321188SToby Isaac   if (len) PetscCall(PetscViewerXMLPutString(viewer, "petscbuildoptions", "Petsc build options", buildoptions));
155*b9321188SToby Isaac   PetscCall(PetscViewerXMLEndSection(viewer, "runspecification"));
156*b9321188SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
157*b9321188SToby Isaac }
158*b9321188SToby Isaac 
159*b9321188SToby Isaac #if PetscDefined(USE_LOG)
160*b9321188SToby Isaac static PetscErrorCode PetscPrintXMLGlobalPerformanceElement(PetscViewer viewer, const char *name, const char *desc, PetscLogDouble local_val, const PetscBool print_average, const PetscBool print_total)
161*b9321188SToby Isaac {
162*b9321188SToby Isaac   PetscLogDouble min, tot, ratio, avg;
163*b9321188SToby Isaac   MPI_Comm       comm;
164*b9321188SToby Isaac   PetscMPIInt    rank, size;
165*b9321188SToby Isaac   PetscLogDouble valrank[2], max[2];
166*b9321188SToby Isaac 
167*b9321188SToby Isaac   PetscFunctionBegin;
168*b9321188SToby Isaac   PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
169*b9321188SToby Isaac   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)viewer), &size));
170*b9321188SToby Isaac   PetscCallMPI(MPI_Comm_rank(comm, &rank));
171*b9321188SToby Isaac 
172*b9321188SToby Isaac   valrank[0] = local_val;
173*b9321188SToby Isaac   valrank[1] = (PetscLogDouble)rank;
174*b9321188SToby Isaac   PetscCall(MPIU_Allreduce(&local_val, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
175*b9321188SToby Isaac   PetscCall(MPIU_Allreduce(valrank, &max, 1, MPIU_2PETSCLOGDOUBLE, MPI_MAXLOC, comm));
176*b9321188SToby Isaac   PetscCall(MPIU_Allreduce(&local_val, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
177*b9321188SToby Isaac   avg = tot / ((PetscLogDouble)size);
178*b9321188SToby Isaac   if (min != 0.0) ratio = max[0] / min;
179*b9321188SToby Isaac   else ratio = 0.0;
180*b9321188SToby Isaac 
181*b9321188SToby Isaac   PetscCall(PetscViewerXMLStartSection(viewer, name, desc));
182*b9321188SToby Isaac   PetscCall(PetscViewerXMLPutDouble(viewer, "max", max[0], "%e"));
183*b9321188SToby Isaac   PetscCall(PetscViewerXMLPutInt(viewer, "maxrank", "rank at which max was found", (PetscMPIInt)max[1]));
184*b9321188SToby Isaac   PetscCall(PetscViewerXMLPutDouble(viewer, "ratio", ratio, "%f"));
185*b9321188SToby Isaac   if (print_average) PetscCall(PetscViewerXMLPutDouble(viewer, "average", avg, "%e"));
186*b9321188SToby Isaac   if (print_total) PetscCall(PetscViewerXMLPutDouble(viewer, "total", tot, "%e"));
187*b9321188SToby Isaac   PetscCall(PetscViewerXMLEndSection(viewer, name));
188*b9321188SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
189*b9321188SToby Isaac }
190*b9321188SToby Isaac 
191*b9321188SToby Isaac /* Print the global performance: max, max/min, average and total of
192*b9321188SToby Isaac  *      time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
193*b9321188SToby Isaac  */
194*b9321188SToby Isaac static PetscErrorCode PetscPrintGlobalPerformance(PetscViewer viewer, PetscLogDouble locTotalTime, PetscLogHandler default_handler)
195*b9321188SToby Isaac {
196*b9321188SToby Isaac   PetscLogDouble  flops, mem, red, mess;
197*b9321188SToby Isaac   PetscInt        num_objects;
198*b9321188SToby Isaac   const PetscBool print_total_yes = PETSC_TRUE, print_total_no = PETSC_FALSE, print_average_no = PETSC_FALSE, print_average_yes = PETSC_TRUE;
199*b9321188SToby Isaac 
200*b9321188SToby Isaac   PetscFunctionBegin;
201*b9321188SToby Isaac   /* Must preserve reduction count before we go on */
202*b9321188SToby Isaac   red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
203*b9321188SToby Isaac 
204*b9321188SToby Isaac   /* Calculate summary information */
205*b9321188SToby Isaac   PetscCall(PetscViewerXMLStartSection(viewer, "globalperformance", "Global performance"));
206*b9321188SToby Isaac 
207*b9321188SToby Isaac   /*   Time */
208*b9321188SToby Isaac   PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "time", "Time (sec)", locTotalTime, print_average_yes, print_total_no));
209*b9321188SToby Isaac 
210*b9321188SToby Isaac   /*   Objects */
211*b9321188SToby Isaac   PetscCall(PetscLogHandlerDefaultGetNumObjects(default_handler, &num_objects));
212*b9321188SToby Isaac   PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "objects", "Objects", (PetscLogDouble)num_objects, print_average_yes, print_total_no));
213*b9321188SToby Isaac 
214*b9321188SToby Isaac   /*   Flop */
215*b9321188SToby Isaac   PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "mflop", "MFlop", petsc_TotalFlops / 1.0E6, print_average_yes, print_total_yes));
216*b9321188SToby Isaac 
217*b9321188SToby Isaac   /*   Flop/sec -- Must talk to Barry here */
218*b9321188SToby Isaac   if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime;
219*b9321188SToby Isaac   else flops = 0.0;
220*b9321188SToby Isaac   PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "mflops", "MFlop/sec", flops / 1.0E6, print_average_yes, print_total_yes));
221*b9321188SToby Isaac 
222*b9321188SToby Isaac   /*   Memory */
223*b9321188SToby Isaac   PetscCall(PetscMallocGetMaximumUsage(&mem));
224*b9321188SToby Isaac   if (mem > 0.0) PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "memory", "Memory (MiB)", mem / 1024.0 / 1024.0, print_average_yes, print_total_yes));
225*b9321188SToby Isaac   /*   Messages */
226*b9321188SToby Isaac   mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
227*b9321188SToby Isaac   PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "messagetransfers", "MPI Message Transfers", mess, print_average_yes, print_total_yes));
228*b9321188SToby Isaac 
229*b9321188SToby Isaac   /*   Message Volume */
230*b9321188SToby Isaac   mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
231*b9321188SToby Isaac   PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "messagevolume", "MPI Message Volume (MiB)", mess / 1024.0 / 1024.0, print_average_yes, print_total_yes));
232*b9321188SToby Isaac 
233*b9321188SToby Isaac   /*   Reductions */
234*b9321188SToby Isaac   PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "reductions", "MPI Reductions", red, print_average_no, print_total_no));
235*b9321188SToby Isaac   PetscCall(PetscViewerXMLEndSection(viewer, "globalperformance"));
236*b9321188SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
237*b9321188SToby Isaac }
238*b9321188SToby Isaac #endif
239*b9321188SToby Isaac 
240*b9321188SToby Isaac /* Print the global performance: max, max/min, average and total of
241*b9321188SToby Isaac  *      time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
242*b9321188SToby Isaac  */
243*b9321188SToby Isaac static PetscErrorCode PetscPrintXMLNestedLinePerfResults(PetscViewer viewer, const char *name, PetscLogDouble value, PetscLogDouble minthreshold, PetscLogDouble maxthreshold, PetscLogDouble minmaxtreshold)
244*b9321188SToby Isaac {
245*b9321188SToby Isaac   MPI_Comm       comm; /* MPI communicator in reduction */
246*b9321188SToby Isaac   PetscMPIInt    rank; /* rank of this process */
247*b9321188SToby Isaac   PetscLogDouble val_in[2], max[2], min[2];
248*b9321188SToby Isaac   PetscLogDouble minvalue, maxvalue, tot;
249*b9321188SToby Isaac   PetscMPIInt    size;
250*b9321188SToby Isaac   PetscMPIInt    minLoc, maxLoc;
251*b9321188SToby Isaac 
252*b9321188SToby Isaac   PetscFunctionBegin;
253*b9321188SToby Isaac   PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
254*b9321188SToby Isaac   PetscCallMPI(MPI_Comm_size(comm, &size));
255*b9321188SToby Isaac   PetscCallMPI(MPI_Comm_rank(comm, &rank));
256*b9321188SToby Isaac   val_in[0] = value;
257*b9321188SToby Isaac   val_in[1] = (PetscLogDouble)rank;
258*b9321188SToby Isaac   PetscCall(MPIU_Allreduce(val_in, max, 1, MPIU_2PETSCLOGDOUBLE, MPI_MAXLOC, comm));
259*b9321188SToby Isaac   PetscCall(MPIU_Allreduce(val_in, min, 1, MPIU_2PETSCLOGDOUBLE, MPI_MINLOC, comm));
260*b9321188SToby Isaac   maxvalue = max[0];
261*b9321188SToby Isaac   maxLoc   = (PetscMPIInt)max[1];
262*b9321188SToby Isaac   minvalue = min[0];
263*b9321188SToby Isaac   minLoc   = (PetscMPIInt)min[1];
264*b9321188SToby Isaac   PetscCall(MPIU_Allreduce(&value, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
265*b9321188SToby Isaac 
266*b9321188SToby Isaac   if (maxvalue < maxthreshold && minvalue >= minthreshold) {
267*b9321188SToby Isaac     /* One call per parent or NO value: don't print */
268*b9321188SToby Isaac   } else {
269*b9321188SToby Isaac     PetscCall(PetscViewerXMLStartSection(viewer, name, NULL));
270*b9321188SToby Isaac     if (maxvalue > minvalue * minmaxtreshold) {
271*b9321188SToby Isaac       PetscCall(PetscViewerXMLPutDouble(viewer, "avgvalue", tot / size, "%g"));
272*b9321188SToby Isaac       PetscCall(PetscViewerXMLPutDouble(viewer, "minvalue", minvalue, "%g"));
273*b9321188SToby Isaac       PetscCall(PetscViewerXMLPutDouble(viewer, "maxvalue", maxvalue, "%g"));
274*b9321188SToby Isaac       PetscCall(PetscViewerXMLPutInt(viewer, "minloc", NULL, minLoc));
275*b9321188SToby Isaac       PetscCall(PetscViewerXMLPutInt(viewer, "maxloc", NULL, maxLoc));
276*b9321188SToby Isaac     } else {
277*b9321188SToby Isaac       PetscCall(PetscViewerXMLPutDouble(viewer, "value", tot / size, "%g"));
278*b9321188SToby Isaac     }
279*b9321188SToby Isaac     PetscCall(PetscViewerXMLEndSection(viewer, name));
280*b9321188SToby Isaac   }
281*b9321188SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
282*b9321188SToby Isaac }
283*b9321188SToby Isaac 
284*b9321188SToby Isaac static PetscErrorCode PetscLogNestedTreePrintLine(PetscViewer viewer, const PetscEventPerfInfo *perfInfo, int childCount, int parentCount, const char *name, PetscLogDouble totalTime)
285*b9321188SToby Isaac {
286*b9321188SToby Isaac   PetscLogDouble time = perfInfo->time;
287*b9321188SToby Isaac   PetscLogDouble timeMx;
288*b9321188SToby Isaac   MPI_Comm       comm;
289*b9321188SToby Isaac 
290*b9321188SToby Isaac   PetscFunctionBegin;
291*b9321188SToby Isaac   PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
292*b9321188SToby Isaac   PetscCall(MPIU_Allreduce(&time, &timeMx, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
293*b9321188SToby Isaac   PetscCall(PetscViewerXMLPutString(viewer, "name", NULL, name));
294*b9321188SToby Isaac   PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "time", time / totalTime * 100.0, 0, 0, 1.02));
295*b9321188SToby Isaac   PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "ncalls", parentCount > 0 ? ((PetscLogDouble)childCount) / ((PetscLogDouble)parentCount) : 1.0, 0.99, 1.01, 1.02));
296*b9321188SToby Isaac   PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "mflops", time >= timeMx * 0.001 ? 1e-6 * perfInfo->flops / time : 0, 0, 0.01, 1.05));
297*b9321188SToby Isaac   PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "mbps", time >= timeMx * 0.001 ? perfInfo->messageLength / (1024 * 1024 * time) : 0, 0, 0.01, 1.05));
298*b9321188SToby Isaac   PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "nreductsps", time >= timeMx * 0.001 ? perfInfo->numReductions / time : 0, 0, 0.01, 1.05));
299*b9321188SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
300*b9321188SToby Isaac }
301*b9321188SToby Isaac 
302*b9321188SToby Isaac static PetscErrorCode PetscNestedNameGetBase(const char name[], const char *base[])
303*b9321188SToby Isaac {
304*b9321188SToby Isaac   size_t n;
305*b9321188SToby Isaac   PetscFunctionBegin;
306*b9321188SToby Isaac   PetscCall(PetscStrlen(name, &n));
307*b9321188SToby Isaac   while (n > 0 && name[n - 1] != ';') n--;
308*b9321188SToby Isaac   *base = &name[n];
309*b9321188SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
310*b9321188SToby Isaac }
311*b9321188SToby Isaac 
312*b9321188SToby Isaac static PetscErrorCode PetscLogNestedTreePrint(PetscViewer viewer, double total_time, double threshold_time, const PetscNestedEventNode *parent_node, PetscEventPerfInfo *parent_info, const PetscNestedEventNode tree[], PetscEventPerfInfo perf[], PetscLogNestedType type, PetscBool print_events)
313*b9321188SToby Isaac {
314*b9321188SToby Isaac   PetscInt           num_children = 0, num_printed;
315*b9321188SToby Isaac   PetscInt           num_nodes    = parent_node->num_descendants;
316*b9321188SToby Isaac   PetscInt          *perm;
317*b9321188SToby Isaac   PetscReal         *times;
318*b9321188SToby Isaac   PetscEventPerfInfo other;
319*b9321188SToby Isaac 
320*b9321188SToby Isaac   PetscFunctionBegin;
321*b9321188SToby Isaac   for (PetscInt node = 0; node < num_nodes; node += 1 + tree[node].num_descendants) {
322*b9321188SToby Isaac     PetscAssert(tree[node].parent == parent_node->id, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed tree invariant");
323*b9321188SToby Isaac     num_children++;
324*b9321188SToby Isaac   }
325*b9321188SToby Isaac   num_printed = num_children;
326*b9321188SToby Isaac   PetscCall(PetscMemzero(&other, sizeof(other)));
327*b9321188SToby Isaac   PetscCall(PetscMalloc2(num_children + 2, &times, num_children + 2, &perm));
328*b9321188SToby Isaac   for (PetscInt i = 0, node = 0; node < num_nodes; i++, node += 1 + tree[node].num_descendants) {
329*b9321188SToby Isaac     PetscLogDouble child_time = perf[node].time;
330*b9321188SToby Isaac 
331*b9321188SToby Isaac     perm[i] = node;
332*b9321188SToby Isaac     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &child_time, 1, MPI_DOUBLE, MPI_MAX, PetscObjectComm((PetscObject)viewer)));
333*b9321188SToby Isaac     times[i] = -child_time;
334*b9321188SToby Isaac 
335*b9321188SToby Isaac     parent_info->time -= perf[node].time;
336*b9321188SToby Isaac     parent_info->flops -= perf[node].flops;
337*b9321188SToby Isaac     parent_info->numMessages -= perf[node].numMessages;
338*b9321188SToby Isaac     parent_info->messageLength -= perf[node].messageLength;
339*b9321188SToby Isaac     parent_info->numReductions -= perf[node].numReductions;
340*b9321188SToby Isaac     if (child_time < threshold_time) {
341*b9321188SToby Isaac       PetscEventPerfInfo *add_to = (type == PETSC_LOG_NESTED_XML) ? &other : parent_info;
342*b9321188SToby Isaac 
343*b9321188SToby Isaac       add_to->time += perf[node].time;
344*b9321188SToby Isaac       add_to->flops += perf[node].flops;
345*b9321188SToby Isaac       add_to->numMessages += perf[node].numMessages;
346*b9321188SToby Isaac       add_to->messageLength += perf[node].messageLength;
347*b9321188SToby Isaac       add_to->numReductions += perf[node].numReductions;
348*b9321188SToby Isaac       add_to->count += perf[node].count;
349*b9321188SToby Isaac       num_printed--;
350*b9321188SToby Isaac     }
351*b9321188SToby Isaac   }
352*b9321188SToby Isaac   perm[num_children]      = -1;
353*b9321188SToby Isaac   times[num_children]     = -parent_info->time;
354*b9321188SToby Isaac   perm[num_children + 1]  = -2;
355*b9321188SToby Isaac   times[num_children + 1] = -other.time;
356*b9321188SToby Isaac   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &times[num_children], 2, MPI_DOUBLE, MPI_MIN, PetscObjectComm((PetscObject)viewer)));
357*b9321188SToby Isaac   if (type == PETSC_LOG_NESTED_FLAMEGRAPH) {
358*b9321188SToby Isaac     /* The output is given as an integer in microseconds because otherwise the file cannot be read
359*b9321188SToby Isaac      * by apps such as speedscope (https://speedscope.app/). */
360*b9321188SToby Isaac     PetscCall(PetscViewerASCIIPrintf(viewer, "%s %" PetscInt64_FMT "\n", parent_node->name, (PetscInt64)(-times[num_children] * 1e6)));
361*b9321188SToby Isaac   }
362*b9321188SToby Isaac   if (other.time > 0.0) num_printed++;
363*b9321188SToby Isaac   // number of items other than "self" that will be printed
364*b9321188SToby Isaac   if (num_printed == 0) {
365*b9321188SToby Isaac     PetscCall(PetscFree2(times, perm));
366*b9321188SToby Isaac     PetscFunctionReturn(PETSC_SUCCESS);
367*b9321188SToby Isaac   }
368*b9321188SToby Isaac   // sort descending by time
369*b9321188SToby Isaac   PetscCall(PetscSortRealWithArrayInt(num_children + 2, times, perm));
370*b9321188SToby Isaac   if (type == PETSC_LOG_NESTED_XML && print_events) PetscCall(PetscViewerXMLStartSection(viewer, "events", NULL));
371*b9321188SToby Isaac   for (PetscInt i = 0; i < num_children + 2; i++) {
372*b9321188SToby Isaac     PetscInt       node       = perm[i];
373*b9321188SToby Isaac     PetscLogDouble child_time = -times[i];
374*b9321188SToby Isaac 
375*b9321188SToby Isaac     if (child_time >= threshold_time || (node < 0 && child_time > 0.0)) {
376*b9321188SToby Isaac       if (type == PETSC_LOG_NESTED_XML) {
377*b9321188SToby Isaac         PetscCall(PetscViewerXMLStartSection(viewer, "event", NULL));
378*b9321188SToby Isaac         if (node == -1) {
379*b9321188SToby Isaac           PetscCall(PetscLogNestedTreePrintLine(viewer, parent_info, 0, 0, "self", total_time));
380*b9321188SToby Isaac         } else if (node == -2) {
381*b9321188SToby Isaac           PetscCall(PetscLogNestedTreePrintLine(viewer, &other, other.count, parent_info->count, "other", total_time));
382*b9321188SToby Isaac         } else {
383*b9321188SToby Isaac           const char *base_name = NULL;
384*b9321188SToby Isaac           PetscCall(PetscNestedNameGetBase(tree[node].name, &base_name));
385*b9321188SToby Isaac           PetscCall(PetscLogNestedTreePrintLine(viewer, &perf[node], perf[node].count, parent_info->count, base_name, total_time));
386*b9321188SToby Isaac           PetscCall(PetscLogNestedTreePrint(viewer, total_time, threshold_time, &tree[node], &perf[node], &tree[node + 1], &perf[node + 1], type, PETSC_TRUE));
387*b9321188SToby Isaac         }
388*b9321188SToby Isaac         PetscCall(PetscViewerXMLEndSection(viewer, "event"));
389*b9321188SToby Isaac       } else if (node >= 0) {
390*b9321188SToby Isaac         PetscCall(PetscLogNestedTreePrint(viewer, total_time, threshold_time, &tree[node], &perf[node], &tree[node + 1], &perf[node + 1], type, PETSC_FALSE));
391*b9321188SToby Isaac       }
392*b9321188SToby Isaac     }
393*b9321188SToby Isaac   }
394*b9321188SToby Isaac   if (type == PETSC_LOG_NESTED_XML && print_events) PetscCall(PetscViewerXMLEndSection(viewer, "events"));
395*b9321188SToby Isaac   PetscCall(PetscFree2(times, perm));
396*b9321188SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
397*b9321188SToby Isaac }
398*b9321188SToby Isaac 
399*b9321188SToby Isaac static PetscErrorCode PetscLogNestedTreePrintTop(PetscViewer viewer, PetscNestedEventTree *tree, PetscLogDouble threshold, PetscLogNestedType type)
400*b9321188SToby Isaac {
401*b9321188SToby Isaac   PetscNestedEventNode *main_stage;
402*b9321188SToby Isaac   PetscNestedEventNode *tree_rem;
403*b9321188SToby Isaac   PetscEventPerfInfo   *main_stage_perf;
404*b9321188SToby Isaac   PetscEventPerfInfo   *perf_rem;
405*b9321188SToby Isaac   PetscLogDouble        time;
406*b9321188SToby Isaac   PetscLogDouble        threshold_time;
407*b9321188SToby Isaac 
408*b9321188SToby Isaac   PetscFunctionBegin;
409*b9321188SToby Isaac   main_stage      = &tree->nodes[0];
410*b9321188SToby Isaac   tree_rem        = &tree->nodes[1];
411*b9321188SToby Isaac   main_stage_perf = &tree->perf[0];
412*b9321188SToby Isaac   perf_rem        = &tree->perf[1];
413*b9321188SToby Isaac   time            = main_stage_perf->time;
414*b9321188SToby Isaac   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &time, 1, MPI_DOUBLE, MPI_MAX, tree->comm));
415*b9321188SToby Isaac   /* Print (or ignore) the children in ascending order of total time */
416*b9321188SToby Isaac   if (type == PETSC_LOG_NESTED_XML) {
417*b9321188SToby Isaac     PetscCall(PetscViewerXMLStartSection(viewer, "timertree", "Timings tree"));
418*b9321188SToby Isaac     PetscCall(PetscViewerXMLPutDouble(viewer, "totaltime", time, "%f"));
419*b9321188SToby Isaac     PetscCall(PetscViewerXMLPutDouble(viewer, "timethreshold", threshold, "%f"));
420*b9321188SToby Isaac   }
421*b9321188SToby Isaac   threshold_time = time * (threshold / 100.0 + 1.e-12);
422*b9321188SToby Isaac   PetscCall(PetscLogNestedTreePrint(viewer, time, threshold_time, main_stage, main_stage_perf, tree_rem, perf_rem, type, PETSC_FALSE));
423*b9321188SToby Isaac   if (type == PETSC_LOG_NESTED_XML) PetscCall(PetscViewerXMLEndSection(viewer, "timertree"));
424*b9321188SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
425*b9321188SToby Isaac }
426*b9321188SToby Isaac 
427*b9321188SToby Isaac PETSC_INTERN PetscErrorCode PetscLogHandlerView_Nested_XML(PetscLogHandler_Nested nested, PetscNestedEventTree *tree, PetscViewer viewer)
428*b9321188SToby Isaac {
429*b9321188SToby Isaac   PetscFunctionBegin;
430*b9321188SToby Isaac   PetscCall(PetscViewerInitASCII_XML(viewer));
431*b9321188SToby Isaac   PetscCall(PetscViewerASCIIPrintf(viewer, "<!-- PETSc Performance Summary: -->\n"));
432*b9321188SToby Isaac   PetscCall(PetscViewerXMLStartSection(viewer, "petscroot", NULL));
433*b9321188SToby Isaac 
434*b9321188SToby Isaac   // Print global information about this run
435*b9321188SToby Isaac   PetscCall(PetscPrintExeSpecs(viewer));
436*b9321188SToby Isaac 
437*b9321188SToby Isaac #if PetscDefined(USE_LOG)
438*b9321188SToby Isaac   {
439*b9321188SToby Isaac     PetscEventPerfInfo *main_stage_info;
440*b9321188SToby Isaac     PetscLogDouble      locTotalTime;
441*b9321188SToby Isaac 
442*b9321188SToby Isaac     PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(nested->handler, 0, 0, &main_stage_info));
443*b9321188SToby Isaac     locTotalTime = main_stage_info->time;
444*b9321188SToby Isaac     PetscCall(PetscPrintGlobalPerformance(viewer, locTotalTime, nested->handler));
445*b9321188SToby Isaac   }
446*b9321188SToby Isaac #endif
447*b9321188SToby Isaac   PetscCall(PetscLogNestedTreePrintTop(viewer, tree, nested->threshold, PETSC_LOG_NESTED_XML));
448*b9321188SToby Isaac   PetscCall(PetscViewerXMLEndSection(viewer, "petscroot"));
449*b9321188SToby Isaac   PetscCall(PetscViewerFinalASCII_XML(viewer));
450*b9321188SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
451*b9321188SToby Isaac }
452*b9321188SToby Isaac 
453*b9321188SToby Isaac PETSC_INTERN PetscErrorCode PetscLogHandlerView_Nested_Flamegraph(PetscLogHandler_Nested nested, PetscNestedEventTree *tree, PetscViewer viewer)
454*b9321188SToby Isaac {
455*b9321188SToby Isaac   PetscFunctionBegin;
456*b9321188SToby Isaac   PetscCall(PetscLogNestedTreePrintTop(viewer, tree, nested->threshold, PETSC_LOG_NESTED_FLAMEGRAPH));
457*b9321188SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
458*b9321188SToby Isaac }
459