1b9321188SToby Isaac /************************************************************************************* 2b9321188SToby Isaac * M A R I T I M E R E S E A R C H I N S T I T U T E N E T H E R L A N D S * 3b9321188SToby Isaac ************************************************************************************* 4b9321188SToby Isaac * authors: Koos Huijssen, Christiaan M. Klaij * 5b9321188SToby Isaac ************************************************************************************* 6b9321188SToby Isaac * content: Viewer for writing XML output * 7b9321188SToby Isaac *************************************************************************************/ 8b9321188SToby Isaac #include <petscviewer.h> 9b9321188SToby Isaac #include <petsc/private/logimpl.h> 10b9321188SToby Isaac #include "xmlviewer.h" 11b9321188SToby Isaac #include "lognested.h" 12b9321188SToby Isaac 13b9321188SToby Isaac static PetscErrorCode PetscViewerXMLStartSection(PetscViewer viewer, const char *name, const char *desc) 14b9321188SToby Isaac { 15b9321188SToby Isaac PetscInt XMLSectionDepthPetsc; 16b9321188SToby Isaac int XMLSectionDepth; 17b9321188SToby Isaac 18b9321188SToby Isaac PetscFunctionBegin; 19b9321188SToby Isaac PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc)); 20835f2295SStefano Zampini PetscCall(PetscCIntCast(XMLSectionDepthPetsc, &XMLSectionDepth)); 21b9321188SToby Isaac if (!desc) { 22b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s>\n", 2 * XMLSectionDepth, "", name)); 23b9321188SToby Isaac } else { 24b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s desc=\"%s\">\n", 2 * XMLSectionDepth, "", name, desc)); 25b9321188SToby Isaac } 26b9321188SToby Isaac PetscCall(PetscViewerASCIIPushTab(viewer)); 27b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 28b9321188SToby Isaac } 29b9321188SToby Isaac 30b9321188SToby Isaac /* Initialize a viewer to XML, and initialize the XMLDepth static parameter */ 31b9321188SToby Isaac static PetscErrorCode PetscViewerInitASCII_XML(PetscViewer viewer) 32b9321188SToby Isaac { 33b9321188SToby Isaac MPI_Comm comm; 34b9321188SToby Isaac char PerfScript[PETSC_MAX_PATH_LEN + 40]; 35b9321188SToby Isaac 36b9321188SToby Isaac PetscFunctionBegin; 37b9321188SToby Isaac PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 38b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")); 39b9321188SToby Isaac PetscCall(PetscStrreplace(comm, "<?xml-stylesheet type=\"text/xsl\" href=\"performance_xml2html.xsl\"?>", PerfScript, sizeof(PerfScript))); 40b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "%s\n", PerfScript)); 41b9321188SToby Isaac PetscCall(PetscViewerXMLStartSection(viewer, "root", NULL)); 42b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 43b9321188SToby Isaac } 44b9321188SToby Isaac 45b9321188SToby Isaac static PetscErrorCode PetscViewerXMLEndSection(PetscViewer viewer, const char *name) 46b9321188SToby Isaac { 47b9321188SToby Isaac PetscInt XMLSectionDepthPetsc; 48b9321188SToby Isaac int XMLSectionDepth; 49b9321188SToby Isaac 50b9321188SToby Isaac PetscFunctionBegin; 51b9321188SToby Isaac PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc)); 52835f2295SStefano Zampini PetscCall(PetscCIntCast(XMLSectionDepthPetsc, &XMLSectionDepth)); 53b9321188SToby Isaac if (XMLSectionDepth > 0) PetscCall(PetscViewerASCIIPopTab(viewer)); 54b9321188SToby Isaac PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc)); 55835f2295SStefano Zampini PetscCall(PetscCIntCast(XMLSectionDepthPetsc, &XMLSectionDepth)); 56b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "%*s</%s>\n", 2 * XMLSectionDepth, "", name)); 57b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 58b9321188SToby Isaac } 59b9321188SToby Isaac 60b9321188SToby Isaac /* Initialize a viewer to XML, and initialize the XMLDepth static parameter */ 61b9321188SToby Isaac static PetscErrorCode PetscViewerFinalASCII_XML(PetscViewer viewer) 62b9321188SToby Isaac { 63b9321188SToby Isaac PetscFunctionBegin; 64b9321188SToby Isaac PetscCall(PetscViewerXMLEndSection(viewer, "root")); 65b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 66b9321188SToby Isaac } 67b9321188SToby Isaac 68b9321188SToby Isaac static PetscErrorCode PetscViewerXMLPutString(PetscViewer viewer, const char *name, const char *desc, const char *value) 69b9321188SToby Isaac { 70b9321188SToby Isaac PetscInt XMLSectionDepthPetsc; 71b9321188SToby Isaac int XMLSectionDepth; 72b9321188SToby Isaac 73b9321188SToby Isaac PetscFunctionBegin; 74b9321188SToby Isaac PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc)); 75835f2295SStefano Zampini PetscCall(PetscCIntCast(XMLSectionDepthPetsc, &XMLSectionDepth)); 76b9321188SToby Isaac if (!desc) { 77b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s>%s</%s>\n", 2 * XMLSectionDepth, "", name, value, name)); 78b9321188SToby Isaac } else { 79b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s desc=\"%s\">%s</%s>\n", 2 * XMLSectionDepth, "", name, desc, value, name)); 80b9321188SToby Isaac } 81b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 82b9321188SToby Isaac } 83b9321188SToby Isaac 84b9321188SToby Isaac static PetscErrorCode PetscViewerXMLPutInt(PetscViewer viewer, const char *name, const char *desc, int value) 85b9321188SToby Isaac { 86b9321188SToby Isaac PetscInt XMLSectionDepthPetsc; 87b9321188SToby Isaac int XMLSectionDepth; 88b9321188SToby Isaac 89b9321188SToby Isaac PetscFunctionBegin; 90b9321188SToby Isaac PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc)); 91835f2295SStefano Zampini PetscCall(PetscCIntCast(XMLSectionDepthPetsc, &XMLSectionDepth)); 92b9321188SToby Isaac if (!desc) { 93b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s>%d</%s>\n", 2 * XMLSectionDepth, "", name, value, name)); 94b9321188SToby Isaac } else { 95b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s desc=\"%s\">%d</%s>\n", 2 * XMLSectionDepth, "", name, desc, value, name)); 96b9321188SToby Isaac } 97b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 98b9321188SToby Isaac } 99b9321188SToby Isaac 100b9321188SToby Isaac static PetscErrorCode PetscViewerXMLPutDouble(PetscViewer viewer, const char *name, PetscLogDouble value, const char *format) 101b9321188SToby Isaac { 102b9321188SToby Isaac PetscInt XMLSectionDepthPetsc; 103b9321188SToby Isaac int XMLSectionDepth; 104b9321188SToby Isaac char buffer[1024]; 105b9321188SToby Isaac 106b9321188SToby Isaac PetscFunctionBegin; 107b9321188SToby Isaac PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc)); 108835f2295SStefano Zampini PetscCall(PetscCIntCast(XMLSectionDepthPetsc, &XMLSectionDepth)); 109b9321188SToby Isaac PetscCall(PetscSNPrintf(buffer, sizeof(buffer), "%*s<%s>%s</%s>\n", 2 * XMLSectionDepth, "", name, format, name)); 110b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, buffer, value)); 111b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 112b9321188SToby Isaac } 113b9321188SToby Isaac 114b9321188SToby Isaac static PetscErrorCode PetscPrintExeSpecs(PetscViewer viewer) 115b9321188SToby Isaac { 116b9321188SToby Isaac char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128]; 117b9321188SToby Isaac char version[256], buildoptions[128] = ""; 118b9321188SToby Isaac PetscMPIInt size; 119b9321188SToby Isaac size_t len; 120b9321188SToby Isaac 121b9321188SToby Isaac PetscFunctionBegin; 122b9321188SToby Isaac PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)viewer), &size)); 123b9321188SToby Isaac PetscCall(PetscGetArchType(arch, sizeof(arch))); 124b9321188SToby Isaac PetscCall(PetscGetHostName(hostname, sizeof(hostname))); 125b9321188SToby Isaac PetscCall(PetscGetUserName(username, sizeof(username))); 126b9321188SToby Isaac PetscCall(PetscGetProgramName(pname, sizeof(pname))); 127b9321188SToby Isaac PetscCall(PetscGetDate(date, sizeof(date))); 128b9321188SToby Isaac PetscCall(PetscGetVersion(version, sizeof(version))); 129b9321188SToby Isaac 130b9321188SToby Isaac PetscCall(PetscViewerXMLStartSection(viewer, "runspecification", "Run Specification")); 131b9321188SToby Isaac PetscCall(PetscViewerXMLPutString(viewer, "executable", "Executable", pname)); 132b9321188SToby Isaac PetscCall(PetscViewerXMLPutString(viewer, "architecture", "Architecture", arch)); 133b9321188SToby Isaac PetscCall(PetscViewerXMLPutString(viewer, "hostname", "Host", hostname)); 134b9321188SToby Isaac PetscCall(PetscViewerXMLPutInt(viewer, "nprocesses", "Number of processes", size)); 135b9321188SToby Isaac PetscCall(PetscViewerXMLPutString(viewer, "user", "Run by user", username)); 136b9321188SToby Isaac PetscCall(PetscViewerXMLPutString(viewer, "date", "Started at", date)); 137f0b74427SPierre Jolivet PetscCall(PetscViewerXMLPutString(viewer, "petscrelease", "PETSc Release", version)); 138b9321188SToby Isaac 139b9321188SToby Isaac if (PetscDefined(USE_DEBUG)) PetscCall(PetscStrlcat(buildoptions, "Debug ", sizeof(buildoptions))); 140b9321188SToby Isaac if (PetscDefined(USE_COMPLEX)) PetscCall(PetscStrlcat(buildoptions, "Complex ", sizeof(buildoptions))); 141b9321188SToby Isaac if (PetscDefined(USE_REAL_SINGLE)) { 142b9321188SToby Isaac PetscCall(PetscStrlcat(buildoptions, "Single ", sizeof(buildoptions))); 143b9321188SToby Isaac } else if (PetscDefined(USE_REAL___FLOAT128)) { 144b9321188SToby Isaac PetscCall(PetscStrlcat(buildoptions, "Quadruple ", sizeof(buildoptions))); 145b9321188SToby Isaac } else if (PetscDefined(USE_REAL___FP16)) { 146b9321188SToby Isaac PetscCall(PetscStrlcat(buildoptions, "Half ", sizeof(buildoptions))); 147b9321188SToby Isaac } 148b9321188SToby Isaac if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscStrlcat(buildoptions, "Int64 ", sizeof(buildoptions))); 149b9321188SToby Isaac #if defined(__cplusplus) 150b9321188SToby Isaac PetscCall(PetscStrlcat(buildoptions, "C++ ", sizeof(buildoptions))); 151b9321188SToby Isaac #endif 152b9321188SToby Isaac PetscCall(PetscStrlen(buildoptions, &len)); 153f0b74427SPierre Jolivet if (len) PetscCall(PetscViewerXMLPutString(viewer, "petscbuildoptions", "PETSc build options", buildoptions)); 154b9321188SToby Isaac PetscCall(PetscViewerXMLEndSection(viewer, "runspecification")); 155b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 156b9321188SToby Isaac } 157b9321188SToby Isaac 158b9321188SToby Isaac static PetscErrorCode PetscPrintXMLGlobalPerformanceElement(PetscViewer viewer, const char *name, const char *desc, PetscLogDouble local_val, const PetscBool print_average, const PetscBool print_total) 159b9321188SToby Isaac { 160b9321188SToby Isaac PetscLogDouble min, tot, ratio, avg; 161b9321188SToby Isaac MPI_Comm comm; 162b9321188SToby Isaac PetscMPIInt rank, size; 163b9321188SToby Isaac PetscLogDouble valrank[2], max[2]; 164b9321188SToby Isaac 165b9321188SToby Isaac PetscFunctionBegin; 166b9321188SToby Isaac PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 167b9321188SToby Isaac PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)viewer), &size)); 168b9321188SToby Isaac PetscCallMPI(MPI_Comm_rank(comm, &rank)); 169b9321188SToby Isaac 170b9321188SToby Isaac valrank[0] = local_val; 171b9321188SToby Isaac valrank[1] = (PetscLogDouble)rank; 172462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(&local_val, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 173462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(valrank, &max, 1, MPIU_2PETSCLOGDOUBLE, MPI_MAXLOC, comm)); 174462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(&local_val, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 175b9321188SToby Isaac avg = tot / ((PetscLogDouble)size); 176b9321188SToby Isaac if (min != 0.0) ratio = max[0] / min; 177b9321188SToby Isaac else ratio = 0.0; 178b9321188SToby Isaac 179b9321188SToby Isaac PetscCall(PetscViewerXMLStartSection(viewer, name, desc)); 180b9321188SToby Isaac PetscCall(PetscViewerXMLPutDouble(viewer, "max", max[0], "%e")); 181b9321188SToby Isaac PetscCall(PetscViewerXMLPutInt(viewer, "maxrank", "rank at which max was found", (PetscMPIInt)max[1])); 182b9321188SToby Isaac PetscCall(PetscViewerXMLPutDouble(viewer, "ratio", ratio, "%f")); 183b9321188SToby Isaac if (print_average) PetscCall(PetscViewerXMLPutDouble(viewer, "average", avg, "%e")); 184b9321188SToby Isaac if (print_total) PetscCall(PetscViewerXMLPutDouble(viewer, "total", tot, "%e")); 185b9321188SToby Isaac PetscCall(PetscViewerXMLEndSection(viewer, name)); 186b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 187b9321188SToby Isaac } 188b9321188SToby Isaac 189b9321188SToby Isaac /* Print the global performance: max, max/min, average and total of 190b9321188SToby Isaac * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions. 191b9321188SToby Isaac */ 192b9321188SToby Isaac static PetscErrorCode PetscPrintGlobalPerformance(PetscViewer viewer, PetscLogDouble locTotalTime, PetscLogHandler default_handler) 193b9321188SToby Isaac { 194b9321188SToby Isaac PetscLogDouble flops, mem, red, mess; 195b9321188SToby Isaac PetscInt num_objects; 196b9321188SToby Isaac const PetscBool print_total_yes = PETSC_TRUE, print_total_no = PETSC_FALSE, print_average_no = PETSC_FALSE, print_average_yes = PETSC_TRUE; 197b9321188SToby Isaac 198b9321188SToby Isaac PetscFunctionBegin; 199b9321188SToby Isaac /* Must preserve reduction count before we go on */ 200b9321188SToby Isaac red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 201b9321188SToby Isaac 202b9321188SToby Isaac /* Calculate summary information */ 203b9321188SToby Isaac PetscCall(PetscViewerXMLStartSection(viewer, "globalperformance", "Global performance")); 204b9321188SToby Isaac 205b9321188SToby Isaac /* Time */ 206b9321188SToby Isaac PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "time", "Time (sec)", locTotalTime, print_average_yes, print_total_no)); 207b9321188SToby Isaac 208b9321188SToby Isaac /* Objects */ 209dff009beSToby Isaac PetscCall(PetscLogHandlerGetNumObjects(default_handler, &num_objects)); 210b9321188SToby Isaac PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "objects", "Objects", (PetscLogDouble)num_objects, print_average_yes, print_total_no)); 211b9321188SToby Isaac 212b9321188SToby Isaac /* Flop */ 213b9321188SToby Isaac PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "mflop", "MFlop", petsc_TotalFlops / 1.0E6, print_average_yes, print_total_yes)); 214b9321188SToby Isaac 215b9321188SToby Isaac /* Flop/sec -- Must talk to Barry here */ 216b9321188SToby Isaac if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime; 217b9321188SToby Isaac else flops = 0.0; 218b9321188SToby Isaac PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "mflops", "MFlop/sec", flops / 1.0E6, print_average_yes, print_total_yes)); 219b9321188SToby Isaac 220b9321188SToby Isaac /* Memory */ 221b9321188SToby Isaac PetscCall(PetscMallocGetMaximumUsage(&mem)); 222b9321188SToby Isaac if (mem > 0.0) PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "memory", "Memory (MiB)", mem / 1024.0 / 1024.0, print_average_yes, print_total_yes)); 223b9321188SToby Isaac /* Messages */ 224b9321188SToby Isaac mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 225b9321188SToby Isaac PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "messagetransfers", "MPI Message Transfers", mess, print_average_yes, print_total_yes)); 226b9321188SToby Isaac 227b9321188SToby Isaac /* Message Volume */ 228b9321188SToby Isaac mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 229b9321188SToby Isaac PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "messagevolume", "MPI Message Volume (MiB)", mess / 1024.0 / 1024.0, print_average_yes, print_total_yes)); 230b9321188SToby Isaac 231b9321188SToby Isaac /* Reductions */ 232b9321188SToby Isaac PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "reductions", "MPI Reductions", red, print_average_no, print_total_no)); 233b9321188SToby Isaac PetscCall(PetscViewerXMLEndSection(viewer, "globalperformance")); 234b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 235b9321188SToby Isaac } 236b9321188SToby Isaac 237b9321188SToby Isaac /* Print the global performance: max, max/min, average and total of 238b9321188SToby Isaac * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions. 239b9321188SToby Isaac */ 240b9321188SToby Isaac static PetscErrorCode PetscPrintXMLNestedLinePerfResults(PetscViewer viewer, const char *name, PetscLogDouble value, PetscLogDouble minthreshold, PetscLogDouble maxthreshold, PetscLogDouble minmaxtreshold) 241b9321188SToby Isaac { 242b9321188SToby Isaac MPI_Comm comm; /* MPI communicator in reduction */ 243b9321188SToby Isaac PetscMPIInt rank; /* rank of this process */ 244b9321188SToby Isaac PetscLogDouble val_in[2], max[2], min[2]; 245b9321188SToby Isaac PetscLogDouble minvalue, maxvalue, tot; 246b9321188SToby Isaac PetscMPIInt size; 247b9321188SToby Isaac PetscMPIInt minLoc, maxLoc; 248b9321188SToby Isaac 249b9321188SToby Isaac PetscFunctionBegin; 250b9321188SToby Isaac PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 251b9321188SToby Isaac PetscCallMPI(MPI_Comm_size(comm, &size)); 252b9321188SToby Isaac PetscCallMPI(MPI_Comm_rank(comm, &rank)); 253b9321188SToby Isaac val_in[0] = value; 254b9321188SToby Isaac val_in[1] = (PetscLogDouble)rank; 255462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(val_in, max, 1, MPIU_2PETSCLOGDOUBLE, MPI_MAXLOC, comm)); 256462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(val_in, min, 1, MPIU_2PETSCLOGDOUBLE, MPI_MINLOC, comm)); 257b9321188SToby Isaac maxvalue = max[0]; 258b9321188SToby Isaac maxLoc = (PetscMPIInt)max[1]; 259b9321188SToby Isaac minvalue = min[0]; 260b9321188SToby Isaac minLoc = (PetscMPIInt)min[1]; 261462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(&value, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 262b9321188SToby Isaac 263b9321188SToby Isaac if (maxvalue < maxthreshold && minvalue >= minthreshold) { 264b9321188SToby Isaac /* One call per parent or NO value: don't print */ 265b9321188SToby Isaac } else { 266b9321188SToby Isaac PetscCall(PetscViewerXMLStartSection(viewer, name, NULL)); 267b9321188SToby Isaac if (maxvalue > minvalue * minmaxtreshold) { 268b9321188SToby Isaac PetscCall(PetscViewerXMLPutDouble(viewer, "avgvalue", tot / size, "%g")); 269b9321188SToby Isaac PetscCall(PetscViewerXMLPutDouble(viewer, "minvalue", minvalue, "%g")); 270b9321188SToby Isaac PetscCall(PetscViewerXMLPutDouble(viewer, "maxvalue", maxvalue, "%g")); 271b9321188SToby Isaac PetscCall(PetscViewerXMLPutInt(viewer, "minloc", NULL, minLoc)); 272b9321188SToby Isaac PetscCall(PetscViewerXMLPutInt(viewer, "maxloc", NULL, maxLoc)); 273b9321188SToby Isaac } else { 274b9321188SToby Isaac PetscCall(PetscViewerXMLPutDouble(viewer, "value", tot / size, "%g")); 275b9321188SToby Isaac } 276b9321188SToby Isaac PetscCall(PetscViewerXMLEndSection(viewer, name)); 277b9321188SToby Isaac } 278b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 279b9321188SToby Isaac } 280b9321188SToby Isaac 281b9321188SToby Isaac static PetscErrorCode PetscLogNestedTreePrintLine(PetscViewer viewer, const PetscEventPerfInfo *perfInfo, int childCount, int parentCount, const char *name, PetscLogDouble totalTime) 282b9321188SToby Isaac { 283b9321188SToby Isaac PetscLogDouble time = perfInfo->time; 284b9321188SToby Isaac PetscLogDouble timeMx; 285b9321188SToby Isaac MPI_Comm comm; 286b9321188SToby Isaac 287b9321188SToby Isaac PetscFunctionBegin; 288b9321188SToby Isaac PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 289462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(&time, &timeMx, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 290b9321188SToby Isaac PetscCall(PetscViewerXMLPutString(viewer, "name", NULL, name)); 291b9321188SToby Isaac PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "time", time / totalTime * 100.0, 0, 0, 1.02)); 292b9321188SToby Isaac PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "ncalls", parentCount > 0 ? ((PetscLogDouble)childCount) / ((PetscLogDouble)parentCount) : 1.0, 0.99, 1.01, 1.02)); 293b9321188SToby Isaac PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "mflops", time >= timeMx * 0.001 ? 1e-6 * perfInfo->flops / time : 0, 0, 0.01, 1.05)); 294b9321188SToby Isaac PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "mbps", time >= timeMx * 0.001 ? perfInfo->messageLength / (1024 * 1024 * time) : 0, 0, 0.01, 1.05)); 295b9321188SToby Isaac PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "nreductsps", time >= timeMx * 0.001 ? perfInfo->numReductions / time : 0, 0, 0.01, 1.05)); 296b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 297b9321188SToby Isaac } 298b9321188SToby Isaac 299b9321188SToby Isaac static PetscErrorCode PetscNestedNameGetBase(const char name[], const char *base[]) 300b9321188SToby Isaac { 301b9321188SToby Isaac size_t n; 3024d86920dSPierre Jolivet 303b9321188SToby Isaac PetscFunctionBegin; 304b9321188SToby Isaac PetscCall(PetscStrlen(name, &n)); 305b9321188SToby Isaac while (n > 0 && name[n - 1] != ';') n--; 306b9321188SToby Isaac *base = &name[n]; 307b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 308b9321188SToby Isaac } 309b9321188SToby Isaac 310b9321188SToby Isaac static PetscErrorCode PetscLogNestedTreePrint(PetscViewer viewer, double total_time, double threshold_time, const PetscNestedEventNode *parent_node, PetscEventPerfInfo *parent_info, const PetscNestedEventNode tree[], PetscEventPerfInfo perf[], PetscLogNestedType type, PetscBool print_events) 311b9321188SToby Isaac { 312b9321188SToby Isaac PetscInt num_children = 0, num_printed; 313b9321188SToby Isaac PetscInt num_nodes = parent_node->num_descendants; 314b9321188SToby Isaac PetscInt *perm; 315*85d60b32SJunchao Zhang PetscReal *times; // Not PetscLogDouble, to reuse PetscSortRealWithArrayInt() below 316b9321188SToby Isaac PetscEventPerfInfo other; 317b9321188SToby Isaac 318b9321188SToby Isaac PetscFunctionBegin; 319b9321188SToby Isaac for (PetscInt node = 0; node < num_nodes; node += 1 + tree[node].num_descendants) { 320b9321188SToby Isaac PetscAssert(tree[node].parent == parent_node->id, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed tree invariant"); 321b9321188SToby Isaac num_children++; 322b9321188SToby Isaac } 323b9321188SToby Isaac num_printed = num_children; 324b9321188SToby Isaac PetscCall(PetscMemzero(&other, sizeof(other))); 325b9321188SToby Isaac PetscCall(PetscMalloc2(num_children + 2, ×, num_children + 2, &perm)); 326b9321188SToby Isaac for (PetscInt i = 0, node = 0; node < num_nodes; i++, node += 1 + tree[node].num_descendants) { 327b9321188SToby Isaac PetscLogDouble child_time = perf[node].time; 328b9321188SToby Isaac 329b9321188SToby Isaac perm[i] = node; 330*85d60b32SJunchao Zhang PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &child_time, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)viewer))); 331b9321188SToby Isaac times[i] = -child_time; 332b9321188SToby Isaac 333b9321188SToby Isaac parent_info->time -= perf[node].time; 334b9321188SToby Isaac parent_info->flops -= perf[node].flops; 335b9321188SToby Isaac parent_info->numMessages -= perf[node].numMessages; 336b9321188SToby Isaac parent_info->messageLength -= perf[node].messageLength; 337b9321188SToby Isaac parent_info->numReductions -= perf[node].numReductions; 338b9321188SToby Isaac if (child_time < threshold_time) { 339b9321188SToby Isaac PetscEventPerfInfo *add_to = (type == PETSC_LOG_NESTED_XML) ? &other : parent_info; 340b9321188SToby Isaac 341b9321188SToby Isaac add_to->time += perf[node].time; 342b9321188SToby Isaac add_to->flops += perf[node].flops; 343b9321188SToby Isaac add_to->numMessages += perf[node].numMessages; 344b9321188SToby Isaac add_to->messageLength += perf[node].messageLength; 345b9321188SToby Isaac add_to->numReductions += perf[node].numReductions; 346b9321188SToby Isaac add_to->count += perf[node].count; 347b9321188SToby Isaac num_printed--; 348b9321188SToby Isaac } 349b9321188SToby Isaac } 350b9321188SToby Isaac perm[num_children] = -1; 351b9321188SToby Isaac times[num_children] = -parent_info->time; 352b9321188SToby Isaac perm[num_children + 1] = -2; 353b9321188SToby Isaac times[num_children + 1] = -other.time; 354*85d60b32SJunchao Zhang PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, ×[num_children], 2, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)viewer))); 355cc201d5cSJunchao Zhang // Sync the time with allreduce results, otherwise it could result in code path divergence through num_printed and early return. 356cc201d5cSJunchao Zhang other.time = -times[num_children + 1]; 357b9321188SToby Isaac if (type == PETSC_LOG_NESTED_FLAMEGRAPH) { 358b9321188SToby Isaac /* The output is given as an integer in microseconds because otherwise the file cannot be read 359b9321188SToby Isaac * by apps such as speedscope (https://speedscope.app/). */ 360b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "%s %" PetscInt64_FMT "\n", parent_node->name, (PetscInt64)(-times[num_children] * 1e6))); 361b9321188SToby Isaac } 362b9321188SToby Isaac if (other.time > 0.0) num_printed++; 363b9321188SToby Isaac // number of items other than "self" that will be printed 364b9321188SToby Isaac if (num_printed == 0) { 365b9321188SToby Isaac PetscCall(PetscFree2(times, perm)); 366b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 367b9321188SToby Isaac } 368b9321188SToby Isaac // sort descending by time 369b9321188SToby Isaac PetscCall(PetscSortRealWithArrayInt(num_children + 2, times, perm)); 370b9321188SToby Isaac if (type == PETSC_LOG_NESTED_XML && print_events) PetscCall(PetscViewerXMLStartSection(viewer, "events", NULL)); 371b9321188SToby Isaac for (PetscInt i = 0; i < num_children + 2; i++) { 372b9321188SToby Isaac PetscInt node = perm[i]; 373b9321188SToby Isaac PetscLogDouble child_time = -times[i]; 374b9321188SToby Isaac 375b9321188SToby Isaac if (child_time >= threshold_time || (node < 0 && child_time > 0.0)) { 376b9321188SToby Isaac if (type == PETSC_LOG_NESTED_XML) { 377b9321188SToby Isaac PetscCall(PetscViewerXMLStartSection(viewer, "event", NULL)); 378b9321188SToby Isaac if (node == -1) { 379b9321188SToby Isaac PetscCall(PetscLogNestedTreePrintLine(viewer, parent_info, 0, 0, "self", total_time)); 380b9321188SToby Isaac } else if (node == -2) { 381b9321188SToby Isaac PetscCall(PetscLogNestedTreePrintLine(viewer, &other, other.count, parent_info->count, "other", total_time)); 382b9321188SToby Isaac } else { 383b9321188SToby Isaac const char *base_name = NULL; 384b9321188SToby Isaac PetscCall(PetscNestedNameGetBase(tree[node].name, &base_name)); 385b9321188SToby Isaac PetscCall(PetscLogNestedTreePrintLine(viewer, &perf[node], perf[node].count, parent_info->count, base_name, total_time)); 386b9321188SToby Isaac PetscCall(PetscLogNestedTreePrint(viewer, total_time, threshold_time, &tree[node], &perf[node], &tree[node + 1], &perf[node + 1], type, PETSC_TRUE)); 387b9321188SToby Isaac } 388b9321188SToby Isaac PetscCall(PetscViewerXMLEndSection(viewer, "event")); 389b9321188SToby Isaac } else if (node >= 0) { 390b9321188SToby Isaac PetscCall(PetscLogNestedTreePrint(viewer, total_time, threshold_time, &tree[node], &perf[node], &tree[node + 1], &perf[node + 1], type, PETSC_FALSE)); 391b9321188SToby Isaac } 392b9321188SToby Isaac } 393b9321188SToby Isaac } 394b9321188SToby Isaac if (type == PETSC_LOG_NESTED_XML && print_events) PetscCall(PetscViewerXMLEndSection(viewer, "events")); 395b9321188SToby Isaac PetscCall(PetscFree2(times, perm)); 396b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 397b9321188SToby Isaac } 398b9321188SToby Isaac 399b9321188SToby Isaac static PetscErrorCode PetscLogNestedTreePrintTop(PetscViewer viewer, PetscNestedEventTree *tree, PetscLogDouble threshold, PetscLogNestedType type) 400b9321188SToby Isaac { 401b9321188SToby Isaac PetscNestedEventNode *main_stage; 402b9321188SToby Isaac PetscNestedEventNode *tree_rem; 403b9321188SToby Isaac PetscEventPerfInfo *main_stage_perf; 404b9321188SToby Isaac PetscEventPerfInfo *perf_rem; 405b9321188SToby Isaac PetscLogDouble time; 406b9321188SToby Isaac PetscLogDouble threshold_time; 407b9321188SToby Isaac 408b9321188SToby Isaac PetscFunctionBegin; 409b9321188SToby Isaac main_stage = &tree->nodes[0]; 410b9321188SToby Isaac tree_rem = &tree->nodes[1]; 411b9321188SToby Isaac main_stage_perf = &tree->perf[0]; 412b9321188SToby Isaac perf_rem = &tree->perf[1]; 413b9321188SToby Isaac time = main_stage_perf->time; 414*85d60b32SJunchao Zhang PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &time, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, tree->comm)); 415b9321188SToby Isaac /* Print (or ignore) the children in ascending order of total time */ 416b9321188SToby Isaac if (type == PETSC_LOG_NESTED_XML) { 417b9321188SToby Isaac PetscCall(PetscViewerXMLStartSection(viewer, "timertree", "Timings tree")); 418b9321188SToby Isaac PetscCall(PetscViewerXMLPutDouble(viewer, "totaltime", time, "%f")); 419b9321188SToby Isaac PetscCall(PetscViewerXMLPutDouble(viewer, "timethreshold", threshold, "%f")); 420b9321188SToby Isaac } 421b9321188SToby Isaac threshold_time = time * (threshold / 100.0 + 1.e-12); 422b9321188SToby Isaac PetscCall(PetscLogNestedTreePrint(viewer, time, threshold_time, main_stage, main_stage_perf, tree_rem, perf_rem, type, PETSC_FALSE)); 423b9321188SToby Isaac if (type == PETSC_LOG_NESTED_XML) PetscCall(PetscViewerXMLEndSection(viewer, "timertree")); 424b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 425b9321188SToby Isaac } 426b9321188SToby Isaac 427b9321188SToby Isaac PETSC_INTERN PetscErrorCode PetscLogHandlerView_Nested_XML(PetscLogHandler_Nested nested, PetscNestedEventTree *tree, PetscViewer viewer) 428b9321188SToby Isaac { 429b9321188SToby Isaac PetscFunctionBegin; 430b9321188SToby Isaac PetscCall(PetscViewerInitASCII_XML(viewer)); 431b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "<!-- PETSc Performance Summary: -->\n")); 432b9321188SToby Isaac PetscCall(PetscViewerXMLStartSection(viewer, "petscroot", NULL)); 433b9321188SToby Isaac 434b9321188SToby Isaac // Print global information about this run 435b9321188SToby Isaac PetscCall(PetscPrintExeSpecs(viewer)); 436b9321188SToby Isaac 4372611ad71SToby Isaac if (PetscDefined(USE_LOG)) { 438b9321188SToby Isaac PetscEventPerfInfo *main_stage_info; 439b9321188SToby Isaac PetscLogDouble locTotalTime; 440b9321188SToby Isaac 441dff009beSToby Isaac PetscCall(PetscLogHandlerGetEventPerfInfo(nested->handler, 0, 0, &main_stage_info)); 442b9321188SToby Isaac locTotalTime = main_stage_info->time; 443b9321188SToby Isaac PetscCall(PetscPrintGlobalPerformance(viewer, locTotalTime, nested->handler)); 444b9321188SToby Isaac } 445b9321188SToby Isaac PetscCall(PetscLogNestedTreePrintTop(viewer, tree, nested->threshold, PETSC_LOG_NESTED_XML)); 446b9321188SToby Isaac PetscCall(PetscViewerXMLEndSection(viewer, "petscroot")); 447b9321188SToby Isaac PetscCall(PetscViewerFinalASCII_XML(viewer)); 448b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 449b9321188SToby Isaac } 450b9321188SToby Isaac 451b9321188SToby Isaac PETSC_INTERN PetscErrorCode PetscLogHandlerView_Nested_Flamegraph(PetscLogHandler_Nested nested, PetscNestedEventTree *tree, PetscViewer viewer) 452b9321188SToby Isaac { 453b9321188SToby Isaac PetscFunctionBegin; 454b9321188SToby Isaac PetscCall(PetscLogNestedTreePrintTop(viewer, tree, nested->threshold, PETSC_LOG_NESTED_FLAMEGRAPH)); 455b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 456b9321188SToby Isaac } 457