1*b9321188SToby Isaac /************************************************************************************* 2*b9321188SToby Isaac * M A R I T I M E R E S E A R C H I N S T I T U T E N E T H E R L A N D S * 3*b9321188SToby Isaac ************************************************************************************* 4*b9321188SToby Isaac * authors: Koos Huijssen, Christiaan M. Klaij * 5*b9321188SToby Isaac ************************************************************************************* 6*b9321188SToby Isaac * content: Viewer for writing XML output * 7*b9321188SToby Isaac *************************************************************************************/ 8*b9321188SToby Isaac #include <petscviewer.h> 9*b9321188SToby Isaac #include <petsc/private/logimpl.h> 10*b9321188SToby Isaac #include <petsc/private/fortranimpl.h> 11*b9321188SToby Isaac #include "xmlviewer.h" 12*b9321188SToby Isaac #include "lognested.h" 13*b9321188SToby Isaac 14*b9321188SToby Isaac static PetscErrorCode PetscViewerXMLStartSection(PetscViewer viewer, const char *name, const char *desc) 15*b9321188SToby Isaac { 16*b9321188SToby Isaac PetscInt XMLSectionDepthPetsc; 17*b9321188SToby Isaac int XMLSectionDepth; 18*b9321188SToby Isaac 19*b9321188SToby Isaac PetscFunctionBegin; 20*b9321188SToby Isaac PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc)); 21*b9321188SToby Isaac XMLSectionDepth = (int)XMLSectionDepthPetsc; 22*b9321188SToby Isaac if (!desc) { 23*b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s>\n", 2 * XMLSectionDepth, "", name)); 24*b9321188SToby Isaac } else { 25*b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s desc=\"%s\">\n", 2 * XMLSectionDepth, "", name, desc)); 26*b9321188SToby Isaac } 27*b9321188SToby Isaac PetscCall(PetscViewerASCIIPushTab(viewer)); 28*b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 29*b9321188SToby Isaac } 30*b9321188SToby Isaac 31*b9321188SToby Isaac /* Initialize a viewer to XML, and initialize the XMLDepth static parameter */ 32*b9321188SToby Isaac static PetscErrorCode PetscViewerInitASCII_XML(PetscViewer viewer) 33*b9321188SToby Isaac { 34*b9321188SToby Isaac MPI_Comm comm; 35*b9321188SToby Isaac char PerfScript[PETSC_MAX_PATH_LEN + 40]; 36*b9321188SToby Isaac 37*b9321188SToby Isaac PetscFunctionBegin; 38*b9321188SToby Isaac PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 39*b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")); 40*b9321188SToby Isaac PetscCall(PetscStrreplace(comm, "<?xml-stylesheet type=\"text/xsl\" href=\"performance_xml2html.xsl\"?>", PerfScript, sizeof(PerfScript))); 41*b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "%s\n", PerfScript)); 42*b9321188SToby Isaac PetscCall(PetscViewerXMLStartSection(viewer, "root", NULL)); 43*b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 44*b9321188SToby Isaac } 45*b9321188SToby Isaac 46*b9321188SToby Isaac static PetscErrorCode PetscViewerXMLEndSection(PetscViewer viewer, const char *name) 47*b9321188SToby Isaac { 48*b9321188SToby Isaac PetscInt XMLSectionDepthPetsc; 49*b9321188SToby Isaac int XMLSectionDepth; 50*b9321188SToby Isaac 51*b9321188SToby Isaac PetscFunctionBegin; 52*b9321188SToby Isaac PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc)); 53*b9321188SToby Isaac XMLSectionDepth = (int)XMLSectionDepthPetsc; 54*b9321188SToby Isaac if (XMLSectionDepth > 0) PetscCall(PetscViewerASCIIPopTab(viewer)); 55*b9321188SToby Isaac PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc)); 56*b9321188SToby Isaac XMLSectionDepth = (int)XMLSectionDepthPetsc; 57*b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "%*s</%s>\n", 2 * XMLSectionDepth, "", name)); 58*b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 59*b9321188SToby Isaac } 60*b9321188SToby Isaac 61*b9321188SToby Isaac /* Initialize a viewer to XML, and initialize the XMLDepth static parameter */ 62*b9321188SToby Isaac static PetscErrorCode PetscViewerFinalASCII_XML(PetscViewer viewer) 63*b9321188SToby Isaac { 64*b9321188SToby Isaac PetscFunctionBegin; 65*b9321188SToby Isaac PetscCall(PetscViewerXMLEndSection(viewer, "root")); 66*b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 67*b9321188SToby Isaac } 68*b9321188SToby Isaac 69*b9321188SToby Isaac static PetscErrorCode PetscViewerXMLPutString(PetscViewer viewer, const char *name, const char *desc, const char *value) 70*b9321188SToby Isaac { 71*b9321188SToby Isaac PetscInt XMLSectionDepthPetsc; 72*b9321188SToby Isaac int XMLSectionDepth; 73*b9321188SToby Isaac 74*b9321188SToby Isaac PetscFunctionBegin; 75*b9321188SToby Isaac PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc)); 76*b9321188SToby Isaac XMLSectionDepth = (int)XMLSectionDepthPetsc; 77*b9321188SToby Isaac if (!desc) { 78*b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s>%s</%s>\n", 2 * XMLSectionDepth, "", name, value, name)); 79*b9321188SToby Isaac } else { 80*b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s desc=\"%s\">%s</%s>\n", 2 * XMLSectionDepth, "", name, desc, value, name)); 81*b9321188SToby Isaac } 82*b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 83*b9321188SToby Isaac } 84*b9321188SToby Isaac 85*b9321188SToby Isaac static PetscErrorCode PetscViewerXMLPutInt(PetscViewer viewer, const char *name, const char *desc, int value) 86*b9321188SToby Isaac { 87*b9321188SToby Isaac PetscInt XMLSectionDepthPetsc; 88*b9321188SToby Isaac int XMLSectionDepth; 89*b9321188SToby Isaac 90*b9321188SToby Isaac PetscFunctionBegin; 91*b9321188SToby Isaac PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc)); 92*b9321188SToby Isaac XMLSectionDepth = (int)XMLSectionDepthPetsc; 93*b9321188SToby Isaac if (!desc) { 94*b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s>%d</%s>\n", 2 * XMLSectionDepth, "", name, value, name)); 95*b9321188SToby Isaac } else { 96*b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s desc=\"%s\">%d</%s>\n", 2 * XMLSectionDepth, "", name, desc, value, name)); 97*b9321188SToby Isaac } 98*b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 99*b9321188SToby Isaac } 100*b9321188SToby Isaac 101*b9321188SToby Isaac static PetscErrorCode PetscViewerXMLPutDouble(PetscViewer viewer, const char *name, PetscLogDouble value, const char *format) 102*b9321188SToby Isaac { 103*b9321188SToby Isaac PetscInt XMLSectionDepthPetsc; 104*b9321188SToby Isaac int XMLSectionDepth; 105*b9321188SToby Isaac char buffer[1024]; 106*b9321188SToby Isaac 107*b9321188SToby Isaac PetscFunctionBegin; 108*b9321188SToby Isaac PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc)); 109*b9321188SToby Isaac XMLSectionDepth = (int)XMLSectionDepthPetsc; 110*b9321188SToby Isaac PetscCall(PetscSNPrintf(buffer, sizeof(buffer), "%*s<%s>%s</%s>\n", 2 * XMLSectionDepth, "", name, format, name)); 111*b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, buffer, value)); 112*b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 113*b9321188SToby Isaac } 114*b9321188SToby Isaac 115*b9321188SToby Isaac static PetscErrorCode PetscPrintExeSpecs(PetscViewer viewer) 116*b9321188SToby Isaac { 117*b9321188SToby Isaac char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128]; 118*b9321188SToby Isaac char version[256], buildoptions[128] = ""; 119*b9321188SToby Isaac PetscMPIInt size; 120*b9321188SToby Isaac size_t len; 121*b9321188SToby Isaac 122*b9321188SToby Isaac PetscFunctionBegin; 123*b9321188SToby Isaac PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)viewer), &size)); 124*b9321188SToby Isaac PetscCall(PetscGetArchType(arch, sizeof(arch))); 125*b9321188SToby Isaac PetscCall(PetscGetHostName(hostname, sizeof(hostname))); 126*b9321188SToby Isaac PetscCall(PetscGetUserName(username, sizeof(username))); 127*b9321188SToby Isaac PetscCall(PetscGetProgramName(pname, sizeof(pname))); 128*b9321188SToby Isaac PetscCall(PetscGetDate(date, sizeof(date))); 129*b9321188SToby Isaac PetscCall(PetscGetVersion(version, sizeof(version))); 130*b9321188SToby Isaac 131*b9321188SToby Isaac PetscCall(PetscViewerXMLStartSection(viewer, "runspecification", "Run Specification")); 132*b9321188SToby Isaac PetscCall(PetscViewerXMLPutString(viewer, "executable", "Executable", pname)); 133*b9321188SToby Isaac PetscCall(PetscViewerXMLPutString(viewer, "architecture", "Architecture", arch)); 134*b9321188SToby Isaac PetscCall(PetscViewerXMLPutString(viewer, "hostname", "Host", hostname)); 135*b9321188SToby Isaac PetscCall(PetscViewerXMLPutInt(viewer, "nprocesses", "Number of processes", size)); 136*b9321188SToby Isaac PetscCall(PetscViewerXMLPutString(viewer, "user", "Run by user", username)); 137*b9321188SToby Isaac PetscCall(PetscViewerXMLPutString(viewer, "date", "Started at", date)); 138*b9321188SToby Isaac PetscCall(PetscViewerXMLPutString(viewer, "petscrelease", "Petsc Release", version)); 139*b9321188SToby Isaac 140*b9321188SToby Isaac if (PetscDefined(USE_DEBUG)) PetscCall(PetscStrlcat(buildoptions, "Debug ", sizeof(buildoptions))); 141*b9321188SToby Isaac if (PetscDefined(USE_COMPLEX)) PetscCall(PetscStrlcat(buildoptions, "Complex ", sizeof(buildoptions))); 142*b9321188SToby Isaac if (PetscDefined(USE_REAL_SINGLE)) { 143*b9321188SToby Isaac PetscCall(PetscStrlcat(buildoptions, "Single ", sizeof(buildoptions))); 144*b9321188SToby Isaac } else if (PetscDefined(USE_REAL___FLOAT128)) { 145*b9321188SToby Isaac PetscCall(PetscStrlcat(buildoptions, "Quadruple ", sizeof(buildoptions))); 146*b9321188SToby Isaac } else if (PetscDefined(USE_REAL___FP16)) { 147*b9321188SToby Isaac PetscCall(PetscStrlcat(buildoptions, "Half ", sizeof(buildoptions))); 148*b9321188SToby Isaac } 149*b9321188SToby Isaac if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscStrlcat(buildoptions, "Int64 ", sizeof(buildoptions))); 150*b9321188SToby Isaac #if defined(__cplusplus) 151*b9321188SToby Isaac PetscCall(PetscStrlcat(buildoptions, "C++ ", sizeof(buildoptions))); 152*b9321188SToby Isaac #endif 153*b9321188SToby Isaac PetscCall(PetscStrlen(buildoptions, &len)); 154*b9321188SToby Isaac if (len) PetscCall(PetscViewerXMLPutString(viewer, "petscbuildoptions", "Petsc build options", buildoptions)); 155*b9321188SToby Isaac PetscCall(PetscViewerXMLEndSection(viewer, "runspecification")); 156*b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 157*b9321188SToby Isaac } 158*b9321188SToby Isaac 159*b9321188SToby Isaac #if PetscDefined(USE_LOG) 160*b9321188SToby Isaac static PetscErrorCode PetscPrintXMLGlobalPerformanceElement(PetscViewer viewer, const char *name, const char *desc, PetscLogDouble local_val, const PetscBool print_average, const PetscBool print_total) 161*b9321188SToby Isaac { 162*b9321188SToby Isaac PetscLogDouble min, tot, ratio, avg; 163*b9321188SToby Isaac MPI_Comm comm; 164*b9321188SToby Isaac PetscMPIInt rank, size; 165*b9321188SToby Isaac PetscLogDouble valrank[2], max[2]; 166*b9321188SToby Isaac 167*b9321188SToby Isaac PetscFunctionBegin; 168*b9321188SToby Isaac PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 169*b9321188SToby Isaac PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)viewer), &size)); 170*b9321188SToby Isaac PetscCallMPI(MPI_Comm_rank(comm, &rank)); 171*b9321188SToby Isaac 172*b9321188SToby Isaac valrank[0] = local_val; 173*b9321188SToby Isaac valrank[1] = (PetscLogDouble)rank; 174*b9321188SToby Isaac PetscCall(MPIU_Allreduce(&local_val, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 175*b9321188SToby Isaac PetscCall(MPIU_Allreduce(valrank, &max, 1, MPIU_2PETSCLOGDOUBLE, MPI_MAXLOC, comm)); 176*b9321188SToby Isaac PetscCall(MPIU_Allreduce(&local_val, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 177*b9321188SToby Isaac avg = tot / ((PetscLogDouble)size); 178*b9321188SToby Isaac if (min != 0.0) ratio = max[0] / min; 179*b9321188SToby Isaac else ratio = 0.0; 180*b9321188SToby Isaac 181*b9321188SToby Isaac PetscCall(PetscViewerXMLStartSection(viewer, name, desc)); 182*b9321188SToby Isaac PetscCall(PetscViewerXMLPutDouble(viewer, "max", max[0], "%e")); 183*b9321188SToby Isaac PetscCall(PetscViewerXMLPutInt(viewer, "maxrank", "rank at which max was found", (PetscMPIInt)max[1])); 184*b9321188SToby Isaac PetscCall(PetscViewerXMLPutDouble(viewer, "ratio", ratio, "%f")); 185*b9321188SToby Isaac if (print_average) PetscCall(PetscViewerXMLPutDouble(viewer, "average", avg, "%e")); 186*b9321188SToby Isaac if (print_total) PetscCall(PetscViewerXMLPutDouble(viewer, "total", tot, "%e")); 187*b9321188SToby Isaac PetscCall(PetscViewerXMLEndSection(viewer, name)); 188*b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 189*b9321188SToby Isaac } 190*b9321188SToby Isaac 191*b9321188SToby Isaac /* Print the global performance: max, max/min, average and total of 192*b9321188SToby Isaac * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions. 193*b9321188SToby Isaac */ 194*b9321188SToby Isaac static PetscErrorCode PetscPrintGlobalPerformance(PetscViewer viewer, PetscLogDouble locTotalTime, PetscLogHandler default_handler) 195*b9321188SToby Isaac { 196*b9321188SToby Isaac PetscLogDouble flops, mem, red, mess; 197*b9321188SToby Isaac PetscInt num_objects; 198*b9321188SToby Isaac const PetscBool print_total_yes = PETSC_TRUE, print_total_no = PETSC_FALSE, print_average_no = PETSC_FALSE, print_average_yes = PETSC_TRUE; 199*b9321188SToby Isaac 200*b9321188SToby Isaac PetscFunctionBegin; 201*b9321188SToby Isaac /* Must preserve reduction count before we go on */ 202*b9321188SToby Isaac red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 203*b9321188SToby Isaac 204*b9321188SToby Isaac /* Calculate summary information */ 205*b9321188SToby Isaac PetscCall(PetscViewerXMLStartSection(viewer, "globalperformance", "Global performance")); 206*b9321188SToby Isaac 207*b9321188SToby Isaac /* Time */ 208*b9321188SToby Isaac PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "time", "Time (sec)", locTotalTime, print_average_yes, print_total_no)); 209*b9321188SToby Isaac 210*b9321188SToby Isaac /* Objects */ 211*b9321188SToby Isaac PetscCall(PetscLogHandlerDefaultGetNumObjects(default_handler, &num_objects)); 212*b9321188SToby Isaac PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "objects", "Objects", (PetscLogDouble)num_objects, print_average_yes, print_total_no)); 213*b9321188SToby Isaac 214*b9321188SToby Isaac /* Flop */ 215*b9321188SToby Isaac PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "mflop", "MFlop", petsc_TotalFlops / 1.0E6, print_average_yes, print_total_yes)); 216*b9321188SToby Isaac 217*b9321188SToby Isaac /* Flop/sec -- Must talk to Barry here */ 218*b9321188SToby Isaac if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime; 219*b9321188SToby Isaac else flops = 0.0; 220*b9321188SToby Isaac PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "mflops", "MFlop/sec", flops / 1.0E6, print_average_yes, print_total_yes)); 221*b9321188SToby Isaac 222*b9321188SToby Isaac /* Memory */ 223*b9321188SToby Isaac PetscCall(PetscMallocGetMaximumUsage(&mem)); 224*b9321188SToby Isaac if (mem > 0.0) PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "memory", "Memory (MiB)", mem / 1024.0 / 1024.0, print_average_yes, print_total_yes)); 225*b9321188SToby Isaac /* Messages */ 226*b9321188SToby Isaac mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 227*b9321188SToby Isaac PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "messagetransfers", "MPI Message Transfers", mess, print_average_yes, print_total_yes)); 228*b9321188SToby Isaac 229*b9321188SToby Isaac /* Message Volume */ 230*b9321188SToby Isaac mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 231*b9321188SToby Isaac PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "messagevolume", "MPI Message Volume (MiB)", mess / 1024.0 / 1024.0, print_average_yes, print_total_yes)); 232*b9321188SToby Isaac 233*b9321188SToby Isaac /* Reductions */ 234*b9321188SToby Isaac PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "reductions", "MPI Reductions", red, print_average_no, print_total_no)); 235*b9321188SToby Isaac PetscCall(PetscViewerXMLEndSection(viewer, "globalperformance")); 236*b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 237*b9321188SToby Isaac } 238*b9321188SToby Isaac #endif 239*b9321188SToby Isaac 240*b9321188SToby Isaac /* Print the global performance: max, max/min, average and total of 241*b9321188SToby Isaac * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions. 242*b9321188SToby Isaac */ 243*b9321188SToby Isaac static PetscErrorCode PetscPrintXMLNestedLinePerfResults(PetscViewer viewer, const char *name, PetscLogDouble value, PetscLogDouble minthreshold, PetscLogDouble maxthreshold, PetscLogDouble minmaxtreshold) 244*b9321188SToby Isaac { 245*b9321188SToby Isaac MPI_Comm comm; /* MPI communicator in reduction */ 246*b9321188SToby Isaac PetscMPIInt rank; /* rank of this process */ 247*b9321188SToby Isaac PetscLogDouble val_in[2], max[2], min[2]; 248*b9321188SToby Isaac PetscLogDouble minvalue, maxvalue, tot; 249*b9321188SToby Isaac PetscMPIInt size; 250*b9321188SToby Isaac PetscMPIInt minLoc, maxLoc; 251*b9321188SToby Isaac 252*b9321188SToby Isaac PetscFunctionBegin; 253*b9321188SToby Isaac PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 254*b9321188SToby Isaac PetscCallMPI(MPI_Comm_size(comm, &size)); 255*b9321188SToby Isaac PetscCallMPI(MPI_Comm_rank(comm, &rank)); 256*b9321188SToby Isaac val_in[0] = value; 257*b9321188SToby Isaac val_in[1] = (PetscLogDouble)rank; 258*b9321188SToby Isaac PetscCall(MPIU_Allreduce(val_in, max, 1, MPIU_2PETSCLOGDOUBLE, MPI_MAXLOC, comm)); 259*b9321188SToby Isaac PetscCall(MPIU_Allreduce(val_in, min, 1, MPIU_2PETSCLOGDOUBLE, MPI_MINLOC, comm)); 260*b9321188SToby Isaac maxvalue = max[0]; 261*b9321188SToby Isaac maxLoc = (PetscMPIInt)max[1]; 262*b9321188SToby Isaac minvalue = min[0]; 263*b9321188SToby Isaac minLoc = (PetscMPIInt)min[1]; 264*b9321188SToby Isaac PetscCall(MPIU_Allreduce(&value, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 265*b9321188SToby Isaac 266*b9321188SToby Isaac if (maxvalue < maxthreshold && minvalue >= minthreshold) { 267*b9321188SToby Isaac /* One call per parent or NO value: don't print */ 268*b9321188SToby Isaac } else { 269*b9321188SToby Isaac PetscCall(PetscViewerXMLStartSection(viewer, name, NULL)); 270*b9321188SToby Isaac if (maxvalue > minvalue * minmaxtreshold) { 271*b9321188SToby Isaac PetscCall(PetscViewerXMLPutDouble(viewer, "avgvalue", tot / size, "%g")); 272*b9321188SToby Isaac PetscCall(PetscViewerXMLPutDouble(viewer, "minvalue", minvalue, "%g")); 273*b9321188SToby Isaac PetscCall(PetscViewerXMLPutDouble(viewer, "maxvalue", maxvalue, "%g")); 274*b9321188SToby Isaac PetscCall(PetscViewerXMLPutInt(viewer, "minloc", NULL, minLoc)); 275*b9321188SToby Isaac PetscCall(PetscViewerXMLPutInt(viewer, "maxloc", NULL, maxLoc)); 276*b9321188SToby Isaac } else { 277*b9321188SToby Isaac PetscCall(PetscViewerXMLPutDouble(viewer, "value", tot / size, "%g")); 278*b9321188SToby Isaac } 279*b9321188SToby Isaac PetscCall(PetscViewerXMLEndSection(viewer, name)); 280*b9321188SToby Isaac } 281*b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 282*b9321188SToby Isaac } 283*b9321188SToby Isaac 284*b9321188SToby Isaac static PetscErrorCode PetscLogNestedTreePrintLine(PetscViewer viewer, const PetscEventPerfInfo *perfInfo, int childCount, int parentCount, const char *name, PetscLogDouble totalTime) 285*b9321188SToby Isaac { 286*b9321188SToby Isaac PetscLogDouble time = perfInfo->time; 287*b9321188SToby Isaac PetscLogDouble timeMx; 288*b9321188SToby Isaac MPI_Comm comm; 289*b9321188SToby Isaac 290*b9321188SToby Isaac PetscFunctionBegin; 291*b9321188SToby Isaac PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 292*b9321188SToby Isaac PetscCall(MPIU_Allreduce(&time, &timeMx, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 293*b9321188SToby Isaac PetscCall(PetscViewerXMLPutString(viewer, "name", NULL, name)); 294*b9321188SToby Isaac PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "time", time / totalTime * 100.0, 0, 0, 1.02)); 295*b9321188SToby Isaac PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "ncalls", parentCount > 0 ? ((PetscLogDouble)childCount) / ((PetscLogDouble)parentCount) : 1.0, 0.99, 1.01, 1.02)); 296*b9321188SToby Isaac PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "mflops", time >= timeMx * 0.001 ? 1e-6 * perfInfo->flops / time : 0, 0, 0.01, 1.05)); 297*b9321188SToby Isaac PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "mbps", time >= timeMx * 0.001 ? perfInfo->messageLength / (1024 * 1024 * time) : 0, 0, 0.01, 1.05)); 298*b9321188SToby Isaac PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "nreductsps", time >= timeMx * 0.001 ? perfInfo->numReductions / time : 0, 0, 0.01, 1.05)); 299*b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 300*b9321188SToby Isaac } 301*b9321188SToby Isaac 302*b9321188SToby Isaac static PetscErrorCode PetscNestedNameGetBase(const char name[], const char *base[]) 303*b9321188SToby Isaac { 304*b9321188SToby Isaac size_t n; 305*b9321188SToby Isaac PetscFunctionBegin; 306*b9321188SToby Isaac PetscCall(PetscStrlen(name, &n)); 307*b9321188SToby Isaac while (n > 0 && name[n - 1] != ';') n--; 308*b9321188SToby Isaac *base = &name[n]; 309*b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 310*b9321188SToby Isaac } 311*b9321188SToby Isaac 312*b9321188SToby Isaac static PetscErrorCode PetscLogNestedTreePrint(PetscViewer viewer, double total_time, double threshold_time, const PetscNestedEventNode *parent_node, PetscEventPerfInfo *parent_info, const PetscNestedEventNode tree[], PetscEventPerfInfo perf[], PetscLogNestedType type, PetscBool print_events) 313*b9321188SToby Isaac { 314*b9321188SToby Isaac PetscInt num_children = 0, num_printed; 315*b9321188SToby Isaac PetscInt num_nodes = parent_node->num_descendants; 316*b9321188SToby Isaac PetscInt *perm; 317*b9321188SToby Isaac PetscReal *times; 318*b9321188SToby Isaac PetscEventPerfInfo other; 319*b9321188SToby Isaac 320*b9321188SToby Isaac PetscFunctionBegin; 321*b9321188SToby Isaac for (PetscInt node = 0; node < num_nodes; node += 1 + tree[node].num_descendants) { 322*b9321188SToby Isaac PetscAssert(tree[node].parent == parent_node->id, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed tree invariant"); 323*b9321188SToby Isaac num_children++; 324*b9321188SToby Isaac } 325*b9321188SToby Isaac num_printed = num_children; 326*b9321188SToby Isaac PetscCall(PetscMemzero(&other, sizeof(other))); 327*b9321188SToby Isaac PetscCall(PetscMalloc2(num_children + 2, ×, num_children + 2, &perm)); 328*b9321188SToby Isaac for (PetscInt i = 0, node = 0; node < num_nodes; i++, node += 1 + tree[node].num_descendants) { 329*b9321188SToby Isaac PetscLogDouble child_time = perf[node].time; 330*b9321188SToby Isaac 331*b9321188SToby Isaac perm[i] = node; 332*b9321188SToby Isaac PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &child_time, 1, MPI_DOUBLE, MPI_MAX, PetscObjectComm((PetscObject)viewer))); 333*b9321188SToby Isaac times[i] = -child_time; 334*b9321188SToby Isaac 335*b9321188SToby Isaac parent_info->time -= perf[node].time; 336*b9321188SToby Isaac parent_info->flops -= perf[node].flops; 337*b9321188SToby Isaac parent_info->numMessages -= perf[node].numMessages; 338*b9321188SToby Isaac parent_info->messageLength -= perf[node].messageLength; 339*b9321188SToby Isaac parent_info->numReductions -= perf[node].numReductions; 340*b9321188SToby Isaac if (child_time < threshold_time) { 341*b9321188SToby Isaac PetscEventPerfInfo *add_to = (type == PETSC_LOG_NESTED_XML) ? &other : parent_info; 342*b9321188SToby Isaac 343*b9321188SToby Isaac add_to->time += perf[node].time; 344*b9321188SToby Isaac add_to->flops += perf[node].flops; 345*b9321188SToby Isaac add_to->numMessages += perf[node].numMessages; 346*b9321188SToby Isaac add_to->messageLength += perf[node].messageLength; 347*b9321188SToby Isaac add_to->numReductions += perf[node].numReductions; 348*b9321188SToby Isaac add_to->count += perf[node].count; 349*b9321188SToby Isaac num_printed--; 350*b9321188SToby Isaac } 351*b9321188SToby Isaac } 352*b9321188SToby Isaac perm[num_children] = -1; 353*b9321188SToby Isaac times[num_children] = -parent_info->time; 354*b9321188SToby Isaac perm[num_children + 1] = -2; 355*b9321188SToby Isaac times[num_children + 1] = -other.time; 356*b9321188SToby Isaac PetscCall(MPIU_Allreduce(MPI_IN_PLACE, ×[num_children], 2, MPI_DOUBLE, MPI_MIN, PetscObjectComm((PetscObject)viewer))); 357*b9321188SToby Isaac if (type == PETSC_LOG_NESTED_FLAMEGRAPH) { 358*b9321188SToby Isaac /* The output is given as an integer in microseconds because otherwise the file cannot be read 359*b9321188SToby Isaac * by apps such as speedscope (https://speedscope.app/). */ 360*b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "%s %" PetscInt64_FMT "\n", parent_node->name, (PetscInt64)(-times[num_children] * 1e6))); 361*b9321188SToby Isaac } 362*b9321188SToby Isaac if (other.time > 0.0) num_printed++; 363*b9321188SToby Isaac // number of items other than "self" that will be printed 364*b9321188SToby Isaac if (num_printed == 0) { 365*b9321188SToby Isaac PetscCall(PetscFree2(times, perm)); 366*b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 367*b9321188SToby Isaac } 368*b9321188SToby Isaac // sort descending by time 369*b9321188SToby Isaac PetscCall(PetscSortRealWithArrayInt(num_children + 2, times, perm)); 370*b9321188SToby Isaac if (type == PETSC_LOG_NESTED_XML && print_events) PetscCall(PetscViewerXMLStartSection(viewer, "events", NULL)); 371*b9321188SToby Isaac for (PetscInt i = 0; i < num_children + 2; i++) { 372*b9321188SToby Isaac PetscInt node = perm[i]; 373*b9321188SToby Isaac PetscLogDouble child_time = -times[i]; 374*b9321188SToby Isaac 375*b9321188SToby Isaac if (child_time >= threshold_time || (node < 0 && child_time > 0.0)) { 376*b9321188SToby Isaac if (type == PETSC_LOG_NESTED_XML) { 377*b9321188SToby Isaac PetscCall(PetscViewerXMLStartSection(viewer, "event", NULL)); 378*b9321188SToby Isaac if (node == -1) { 379*b9321188SToby Isaac PetscCall(PetscLogNestedTreePrintLine(viewer, parent_info, 0, 0, "self", total_time)); 380*b9321188SToby Isaac } else if (node == -2) { 381*b9321188SToby Isaac PetscCall(PetscLogNestedTreePrintLine(viewer, &other, other.count, parent_info->count, "other", total_time)); 382*b9321188SToby Isaac } else { 383*b9321188SToby Isaac const char *base_name = NULL; 384*b9321188SToby Isaac PetscCall(PetscNestedNameGetBase(tree[node].name, &base_name)); 385*b9321188SToby Isaac PetscCall(PetscLogNestedTreePrintLine(viewer, &perf[node], perf[node].count, parent_info->count, base_name, total_time)); 386*b9321188SToby Isaac PetscCall(PetscLogNestedTreePrint(viewer, total_time, threshold_time, &tree[node], &perf[node], &tree[node + 1], &perf[node + 1], type, PETSC_TRUE)); 387*b9321188SToby Isaac } 388*b9321188SToby Isaac PetscCall(PetscViewerXMLEndSection(viewer, "event")); 389*b9321188SToby Isaac } else if (node >= 0) { 390*b9321188SToby Isaac PetscCall(PetscLogNestedTreePrint(viewer, total_time, threshold_time, &tree[node], &perf[node], &tree[node + 1], &perf[node + 1], type, PETSC_FALSE)); 391*b9321188SToby Isaac } 392*b9321188SToby Isaac } 393*b9321188SToby Isaac } 394*b9321188SToby Isaac if (type == PETSC_LOG_NESTED_XML && print_events) PetscCall(PetscViewerXMLEndSection(viewer, "events")); 395*b9321188SToby Isaac PetscCall(PetscFree2(times, perm)); 396*b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 397*b9321188SToby Isaac } 398*b9321188SToby Isaac 399*b9321188SToby Isaac static PetscErrorCode PetscLogNestedTreePrintTop(PetscViewer viewer, PetscNestedEventTree *tree, PetscLogDouble threshold, PetscLogNestedType type) 400*b9321188SToby Isaac { 401*b9321188SToby Isaac PetscNestedEventNode *main_stage; 402*b9321188SToby Isaac PetscNestedEventNode *tree_rem; 403*b9321188SToby Isaac PetscEventPerfInfo *main_stage_perf; 404*b9321188SToby Isaac PetscEventPerfInfo *perf_rem; 405*b9321188SToby Isaac PetscLogDouble time; 406*b9321188SToby Isaac PetscLogDouble threshold_time; 407*b9321188SToby Isaac 408*b9321188SToby Isaac PetscFunctionBegin; 409*b9321188SToby Isaac main_stage = &tree->nodes[0]; 410*b9321188SToby Isaac tree_rem = &tree->nodes[1]; 411*b9321188SToby Isaac main_stage_perf = &tree->perf[0]; 412*b9321188SToby Isaac perf_rem = &tree->perf[1]; 413*b9321188SToby Isaac time = main_stage_perf->time; 414*b9321188SToby Isaac PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &time, 1, MPI_DOUBLE, MPI_MAX, tree->comm)); 415*b9321188SToby Isaac /* Print (or ignore) the children in ascending order of total time */ 416*b9321188SToby Isaac if (type == PETSC_LOG_NESTED_XML) { 417*b9321188SToby Isaac PetscCall(PetscViewerXMLStartSection(viewer, "timertree", "Timings tree")); 418*b9321188SToby Isaac PetscCall(PetscViewerXMLPutDouble(viewer, "totaltime", time, "%f")); 419*b9321188SToby Isaac PetscCall(PetscViewerXMLPutDouble(viewer, "timethreshold", threshold, "%f")); 420*b9321188SToby Isaac } 421*b9321188SToby Isaac threshold_time = time * (threshold / 100.0 + 1.e-12); 422*b9321188SToby Isaac PetscCall(PetscLogNestedTreePrint(viewer, time, threshold_time, main_stage, main_stage_perf, tree_rem, perf_rem, type, PETSC_FALSE)); 423*b9321188SToby Isaac if (type == PETSC_LOG_NESTED_XML) PetscCall(PetscViewerXMLEndSection(viewer, "timertree")); 424*b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 425*b9321188SToby Isaac } 426*b9321188SToby Isaac 427*b9321188SToby Isaac PETSC_INTERN PetscErrorCode PetscLogHandlerView_Nested_XML(PetscLogHandler_Nested nested, PetscNestedEventTree *tree, PetscViewer viewer) 428*b9321188SToby Isaac { 429*b9321188SToby Isaac PetscFunctionBegin; 430*b9321188SToby Isaac PetscCall(PetscViewerInitASCII_XML(viewer)); 431*b9321188SToby Isaac PetscCall(PetscViewerASCIIPrintf(viewer, "<!-- PETSc Performance Summary: -->\n")); 432*b9321188SToby Isaac PetscCall(PetscViewerXMLStartSection(viewer, "petscroot", NULL)); 433*b9321188SToby Isaac 434*b9321188SToby Isaac // Print global information about this run 435*b9321188SToby Isaac PetscCall(PetscPrintExeSpecs(viewer)); 436*b9321188SToby Isaac 437*b9321188SToby Isaac #if PetscDefined(USE_LOG) 438*b9321188SToby Isaac { 439*b9321188SToby Isaac PetscEventPerfInfo *main_stage_info; 440*b9321188SToby Isaac PetscLogDouble locTotalTime; 441*b9321188SToby Isaac 442*b9321188SToby Isaac PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(nested->handler, 0, 0, &main_stage_info)); 443*b9321188SToby Isaac locTotalTime = main_stage_info->time; 444*b9321188SToby Isaac PetscCall(PetscPrintGlobalPerformance(viewer, locTotalTime, nested->handler)); 445*b9321188SToby Isaac } 446*b9321188SToby Isaac #endif 447*b9321188SToby Isaac PetscCall(PetscLogNestedTreePrintTop(viewer, tree, nested->threshold, PETSC_LOG_NESTED_XML)); 448*b9321188SToby Isaac PetscCall(PetscViewerXMLEndSection(viewer, "petscroot")); 449*b9321188SToby Isaac PetscCall(PetscViewerFinalASCII_XML(viewer)); 450*b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 451*b9321188SToby Isaac } 452*b9321188SToby Isaac 453*b9321188SToby Isaac PETSC_INTERN PetscErrorCode PetscLogHandlerView_Nested_Flamegraph(PetscLogHandler_Nested nested, PetscNestedEventTree *tree, PetscViewer viewer) 454*b9321188SToby Isaac { 455*b9321188SToby Isaac PetscFunctionBegin; 456*b9321188SToby Isaac PetscCall(PetscLogNestedTreePrintTop(viewer, tree, nested->threshold, PETSC_LOG_NESTED_FLAMEGRAPH)); 457*b9321188SToby Isaac PetscFunctionReturn(PETSC_SUCCESS); 458*b9321188SToby Isaac } 459