Actual source code: xmllogevent.c
petsc-3.10.5 2019-03-28
1: /*************************************************************************************
2: * M A R I T I M E R E S E A R C H I N S T I T U T E N E T H E R L A N D S *
3: *************************************************************************************
4: * authors: Bas van 't Hof, Koos Huijssen, Christiaan M. Klaij *
5: *************************************************************************************
6: * content: Support for nested PetscTimers *
7: *************************************************************************************/
8: #include <petsclog.h>
9: #include <petsc/private/logimpl.h>
10: #include <petsctime.h>
11: #include <petscviewer.h>
12: #include "../src/sys/logging/xmlviewer.h"
14: #if defined(PETSC_USE_LOG)
16: /*
17: * Support for nested PetscTimers
18: *
19: * PetscTimers keep track of a lot of useful information: Wall clock times,
20: * message passing statistics, flop counts. Information about the nested structure
21: * of the timers is lost. Example:
22: *
23: * 7:30 Start: awake
24: * 7:30 Start: morning routine
25: * 7:40 Start: eat
26: * 7:49 Done: eat
27: * 7:43 Done: morning routine
28: * 8:15 Start: work
29: * 12:15 Start: eat
30: * 12:45 Done: eat
31: * 16:00 Done: work
32: * 16:30 Start: evening routine
33: * 18:30 Start: eat
34: * 19:15 Done: eat
35: * 22:00 Done: evening routine
36: * 22:00 Done: awake
37: *
38: * Petsc timers provide the following timer results:
39: *
40: * awake: 1 call 14:30 hours
41: * morning routine: 1 call 0:13 hours
42: * eat: 3 calls 1:24 hours
43: * work: 1 call 7:45 hours
44: * evening routine 1 call 5:30 hours
45: *
46: * Nested timers can be used to get the following table:
47: *
48: * [1 call]: awake 14:30 hours
49: * [1 call]: morning routine 0:13 hours ( 2 % of awake)
50: * [1 call]: eat 0:09 hours (69 % of morning routine)
51: * rest (morning routine) 0:04 hours (31 % of morning routine)
52: * [1 call]: work 7:45 hours (53 % of awake)
53: * [1 call]: eat 0:30 hours ( 6 % of work)
54: * rest (work) 7:15 hours (94 % of work)
55: * [1 call]: evening routine 5:30 hours (38 % of awake)
56: * [1 call]: eat 0:45 hours (14 % of evening routine)
57: * rest (evening routine) 4:45 hours (86 % of morning routine)
58: *
59: * We ignore the concept of 'stages', because these seem to be conflicting notions, or at least,
60: * the nested timers make the stages unnecessary.
61: *
62: */
64: /*
65: * Data structures for keeping track of nested timers:
66: *
67: * nestedEvents: information about the timers that have actually been activated
68: * dftParentActive: if a timer is started now, it is part of (nested inside) the dftParentActive
69: *
70: * The Default-timers are used to time the nested timers. Every nested timer corresponds to
71: * (one or more) default timers, where one of the default timers has the same event-id as the
72: * nested one.
73: *
74: * Because of the risk of confusion between nested timer ids and default timer ids, we
75: * introduce a typedef for nested events (NestedEventId) and use the existing type PetscLogEvent
76: * only for default events. Also, all nested event variables are prepended with 'nst', and
77: * default timers with 'dft'.
78: */
80: #define DFT_ID_AWAKE -1
82: typedef PetscLogEvent NestedEventId;
83: typedef struct {
84: NestedEventId nstEvent; /* event-code for this nested event, argument 'event' in PetscLogEventStartNested */
85: int nParents; /* number of 'dftParents': the default timer which was the dftParentActive when this nested timer was activated */
86: PetscLogEvent *dftParentsSorted; /* The default timers which were the dftParentActive when this nested event was started */
87: PetscLogEvent *dftEvents; /* The default timers which represent the different 'instances' of this nested event */
89: PetscLogEvent *dftParents; /* The default timers which were the dftParentActive when this nested event was started */
90: PetscLogEvent *dftEventsSorted; /* The default timers which represent the different 'instances' of this nested event */
91: } PetscNestedEvent;
93: static PetscLogEvent dftParentActive = DFT_ID_AWAKE;
94: static int nNestedEvents = 0;
95: static int nNestedEventsAllocated = 0;
96: static PetscNestedEvent *nestedEvents = NULL;
97: static PetscLogDouble thresholdTime = 0.01; /* initial value was 0.1 */
99: #define THRESHOLD (thresholdTime/100.0+1e-12)
101: static PetscErrorCode PetscLogEventBeginNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4);
102: static PetscErrorCode PetscLogEventEndNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4);
103: PETSC_INTERN PetscErrorCode PetscLogView_Nested(PetscViewer);
106: /*@C
107: PetscLogNestedBegin - Turns on nested logging of objects and events. This logs flop
108: rates and object creation and should not slow programs down too much.
110: Logically Collective over PETSC_COMM_WORLD
112: Options Database Keys:
113: . -log_view :filename.xml:ascii_xml - Prints an XML summary of flop and timing information to the file
115: Usage:
116: .vb
117: PetscInitialize(...);
118: PetscLogNestedBegin();
119: ... code ...
120: PetscLogView(viewer);
121: PetscFinalize();
122: .ve
124: Level: advanced
126: .keywords: log, begin
127: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogView(), PetscLogTraceBegin(), PetscLogDefaultBegin()
128: @*/
129: PetscErrorCode PetscLogNestedBegin(void)
130: {
133: if (nestedEvents) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_COR,"nestedEvents already allocated");
135: nNestedEventsAllocated = 10;
136: PetscMalloc1(nNestedEventsAllocated,&nestedEvents);
139: dftParentActive = DFT_ID_AWAKE;
140: nNestedEvents =1;
142: /* 'Awake' is nested event 0. It has no parents */
143: nestedEvents[0].nstEvent = 0;
144: nestedEvents[0].nParents = 0;
145: nestedEvents[0].dftParentsSorted = NULL;
146: nestedEvents[0].dftEvents = NULL;
147: nestedEvents[0].dftParents = NULL;
148: nestedEvents[0].dftEventsSorted = NULL;
150: PetscLogSet(PetscLogEventBeginNested,PetscLogEventEndNested);
151: return(0);
152: }
154: /* Delete the data structures for the nested timers */
155: PetscErrorCode PetscLogNestedEnd(void)
156: {
158: int i;
160: if (!nestedEvents) return(0);
161: for (i=0; i<nNestedEvents; i++) {
162: PetscFree4(nestedEvents[i].dftParentsSorted,nestedEvents[i].dftEventsSorted,nestedEvents[i].dftParents,nestedEvents[i].dftEvents);
163: }
164: PetscFree(nestedEvents);
165: nestedEvents = NULL;
166: nNestedEvents = 0;
167: nNestedEventsAllocated = 0;
168: return(0);
169: }
172: /*
173: * UTILITIES: FIND STUFF IN SORTED ARRAYS
174: *
175: * Utility: find a default timer in a sorted array */
176: static PetscErrorCode PetscLogEventFindDefaultTimer(PetscLogEvent dftIndex, /* index to be found */
177: const PetscLogEvent *dftArray, /* sorted array of PetscLogEvent-ids */
178: int narray, /* dimension of dftArray */
179: int *entry) /* entry in the array where dftIndex may be found;
180: * if dftArray[entry] != dftIndex, then dftIndex is not part of dftArray
181: * In that case, the dftIndex can be inserted at this entry. */
182: {
184: if (narray==0 || dftIndex <= dftArray[0]) {
185: *entry = 0;
186: } else if (dftIndex > dftArray[narray-1]) {
187: *entry = narray;
188: } else {
189: int ihigh=narray-1, ilow=0;
190: while (ihigh>ilow) {
191: const int imiddle = (ihigh+ilow)/2;
192: if (dftArray[imiddle] > dftIndex) {
193: ihigh=imiddle;
194: } else if (dftArray[imiddle]<dftIndex) {
195: ilow =imiddle+1;
196: } else {
197: ihigh=imiddle;
198: ilow =imiddle;
199: }
200: }
201: *entry = ihigh;
202: }
203: return(0);
204: }
206: /* Utility: find the nested event with given identification */
207: static PetscErrorCode PetscLogEventFindNestedTimer(NestedEventId nstEvent, /* Nested event to be found */
208: int *entry) /* entry in the nestedEvents where nstEvent may be found;
209: * if nestedEvents[entry].nstEvent != nstEvent, then index is not part of iarray */
210: {
213: if (nNestedEvents==0 || nstEvent <= nestedEvents[0].nstEvent) {
214: *entry = 0;
215: } else if (nstEvent > nestedEvents[nNestedEvents-1].nstEvent) {
216: *entry = nNestedEvents;
217: } else {
218: int ihigh=nNestedEvents-1, ilow=0;
219: while (ihigh>ilow) {
220: const int imiddle = (ihigh+ilow)/2;
221: if (nestedEvents[imiddle].nstEvent > nstEvent) {
222: ihigh=imiddle;
223: } else if (nestedEvents[imiddle].nstEvent<nstEvent) {
224: ilow =imiddle+1;
225: } else {
226: ihigh=imiddle;
227: ilow =imiddle;
228: }
229: }
230: *entry = ihigh;
231: }
232: return(0);
233: }
235: /*
236: Nested logging is not prepared yet to support user-defined logging stages, so for now we force logging on the main stage.
237: Using PetscLogStage{Push/Pop}() would be more appropriate, but these two calls do extra bookkeeping work we don't need.
238: */
240: #define MAINSTAGE 0
242: static PetscLogStage savedStage = 0;
244: PETSC_STATIC_INLINE PetscErrorCode PetscLogStageOverride(void)
245: {
246: PetscStageLog stageLog = petsc_stageLog;
250: if (stageLog->curStage == MAINSTAGE) return(0);
251: savedStage = stageLog->curStage;
252: stageLog->curStage = MAINSTAGE;
253: PetscIntStackPush(stageLog->stack, MAINSTAGE);
254: return(0);
255: }
257: PETSC_STATIC_INLINE PetscErrorCode PetscLogStageRestore(void)
258: {
259: PetscStageLog stageLog = petsc_stageLog;
263: if (savedStage == MAINSTAGE) return(0);
264: stageLog->curStage = savedStage;
265: PetscIntStackPop(stageLog->stack, &savedStage);
266: return(0);
267: }
269: /******************************************************************************************/
270: /* Start a nested event */
271: static PetscErrorCode PetscLogEventBeginNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
272: {
273: PetscErrorCode ierr;
274: int entry, pentry, tentry,i;
275: PetscLogEvent dftEvent;
278: PetscLogEventFindNestedTimer(nstEvent, &entry);
279: if (entry>=nNestedEvents || nestedEvents[entry].nstEvent != nstEvent) {
280: /* Nested event doesn't exist yet: create it */
282: if (nNestedEvents==nNestedEventsAllocated) {
283: /* Enlarge and re-allocate nestedEvents if needed */
284: PetscNestedEvent *tmp = nestedEvents;
285: PetscMalloc1(2*nNestedEvents,&nestedEvents);
286: nNestedEventsAllocated*=2;
287: PetscMemcpy(nestedEvents, tmp, nNestedEvents*sizeof(PetscNestedEvent));
288: PetscFree(tmp);
289: }
291: /* Clear space in nestedEvents for new nested event */
292: nNestedEvents++;
293: for (i = nNestedEvents-1; i>entry; i--) {
294: nestedEvents[i] = nestedEvents[i-1];
295: }
297: /* Create event in nestedEvents */
298: nestedEvents[entry].nstEvent = nstEvent;
299: nestedEvents[entry].nParents=1;
300: PetscMalloc4(1,&nestedEvents[entry].dftParentsSorted,1,&nestedEvents[entry].dftEventsSorted,1,&nestedEvents[entry].dftParents,1,&nestedEvents[entry].dftEvents);
302: /* Fill in new event */
303: pentry = 0;
304: dftEvent = (PetscLogEvent) nstEvent;
306: nestedEvents[entry].nstEvent = nstEvent;
307: nestedEvents[entry].dftParents[pentry] = dftParentActive;
308: nestedEvents[entry].dftEvents[pentry] = dftEvent;
309: nestedEvents[entry].dftParentsSorted[pentry] = dftParentActive;
310: nestedEvents[entry].dftEventsSorted[pentry] = dftEvent;
312: } else {
313: /* Nested event exists: find current dftParentActive among parents */
314: PetscLogEvent *dftParentsSorted = nestedEvents[entry].dftParentsSorted;
315: PetscLogEvent *dftEvents = nestedEvents[entry].dftEvents;
316: int nParents = nestedEvents[entry].nParents;
318: PetscLogEventFindDefaultTimer( dftParentActive, dftParentsSorted, nParents, &pentry);
320: if (pentry>=nParents || dftParentActive != dftParentsSorted[pentry]) {
321: /* dftParentActive not in the list: add it to the list */
322: int i;
323: PetscLogEvent *dftParents = nestedEvents[entry].dftParents;
324: PetscLogEvent *dftEventsSorted = nestedEvents[entry].dftEventsSorted;
325: char name[100];
327: /* Register a new default timer */
328: sprintf(name, "%d -> %d", (int) dftParentActive, (int) nstEvent);
329: PetscLogEventRegister(name, 0, &dftEvent);
330: PetscLogEventFindDefaultTimer( dftEvent, dftEventsSorted, nParents, &tentry);
332: /* Reallocate parents and dftEvents to make space for new parent */
333: PetscMalloc4(1+nParents,&nestedEvents[entry].dftParentsSorted,1+nParents,&nestedEvents[entry].dftEventsSorted,1+nParents,&nestedEvents[entry].dftParents,1+nParents,&nestedEvents[entry].dftEvents);
334: PetscMemcpy(nestedEvents[entry].dftParentsSorted, dftParentsSorted, nParents*sizeof(PetscLogEvent));
335: PetscMemcpy(nestedEvents[entry].dftEventsSorted, dftEventsSorted, nParents*sizeof(PetscLogEvent));
336: PetscMemcpy(nestedEvents[entry].dftParents, dftParents, nParents*sizeof(PetscLogEvent));
337: PetscMemcpy(nestedEvents[entry].dftEvents, dftEvents, nParents*sizeof(PetscLogEvent));
338: PetscFree4(dftParentsSorted,dftEventsSorted,dftParents,dftEvents);
340: dftParents = nestedEvents[entry].dftParents;
341: dftEvents = nestedEvents[entry].dftEvents;
342: dftParentsSorted = nestedEvents[entry].dftParentsSorted;
343: dftEventsSorted = nestedEvents[entry].dftEventsSorted;
345: nestedEvents[entry].nParents++;
346: nParents++;
348: for (i = nParents-1; i>pentry; i--) {
349: dftParentsSorted[i] = dftParentsSorted[i-1];
350: dftEvents[i] = dftEvents[i-1];
351: }
352: for (i = nParents-1; i>tentry; i--) {
353: dftParents[i] = dftParents[i-1];
354: dftEventsSorted[i] = dftEventsSorted[i-1];
355: }
357: /* Fill in the new default timer */
358: dftParentsSorted[pentry] = dftParentActive;
359: dftEvents[pentry] = dftEvent;
360: dftParents[tentry] = dftParentActive;
361: dftEventsSorted[tentry] = dftEvent;
363: } else {
364: /* dftParentActive was found: find the corresponding default 'dftEvent'-timer */
365: dftEvent = nestedEvents[entry].dftEvents[pentry];
366: }
367: }
369: /* Start the default 'dftEvent'-timer and update the dftParentActive */
370: PetscLogStageOverride();
371: PetscLogEventBeginDefault(dftEvent,t,o1,o2,o3,o4);
372: PetscLogStageRestore();
373: dftParentActive = dftEvent;
374: return(0);
375: }
377: /* End a nested event */
378: static PetscErrorCode PetscLogEventEndNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
379: {
380: PetscErrorCode ierr;
381: int entry, pentry, nParents;
382: PetscLogEvent *dftEventsSorted;
385: /* Find the nested event */
386: PetscLogEventFindNestedTimer(nstEvent, &entry);
387: if (entry>=nNestedEvents) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Logging event %d larger than number of events %d",entry,nNestedEvents);
388: if (nestedEvents[entry].nstEvent != nstEvent) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Logging event %d had unbalanced begin/end pairs does not match %d",entry,nstEvent);
389: dftEventsSorted = nestedEvents[entry].dftEventsSorted;
390: nParents = nestedEvents[entry].nParents;
392: /* Find the current default timer among the 'dftEvents' of this event */
393: PetscLogEventFindDefaultTimer( dftParentActive, dftEventsSorted, nParents, &pentry);
395: if (pentry>=nParents) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Entry %d is larger than number of parents %d",pentry,nParents);
396: if (dftEventsSorted[pentry] != dftParentActive) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Active parent is %d, but we seem to be closing %d",dftParentActive,dftEventsSorted[pentry]);
398: /* Stop the default timer and update the dftParentActive */
399: PetscLogStageOverride();
400: PetscLogEventEndDefault(dftParentActive,t,o1,o2,o3,o4);
401: PetscLogStageRestore();
402: dftParentActive = nestedEvents[entry].dftParents[pentry];
403: return(0);
404: }
406: /*@
407: PetscLogSetThreshold - Set the threshold time for logging the events; this is a percentage out of 100, so 1. means any event
408: that takes 1 or more percent of the time.
410: Logically Collective over PETSC_COMM_WORLD
412: Input Parameter:
413: . newThresh - the threshold to use
415: Output Parameter:
416: . oldThresh - the previously set threshold value
418: Options Database Keys:
419: . -log_view :filename.xml:ascii_xml - Prints an XML summary of flop and timing information to the file
421: Usage:
422: .vb
423: PetscInitialize(...);
424: PetscLogNestedBegin();
425: PetscLogSetThreshold(0.1,&oldthresh);
426: ... code ...
427: PetscLogView(viewer);
428: PetscFinalize();
429: .ve
431: Level: advanced
433: .keywords: log, begin
434: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogView(), PetscLogTraceBegin(), PetscLogDefaultBegin(),
435: PetscLogNestedBegin()
436: @*/
437: PetscErrorCode PetscLogSetThreshold(PetscLogDouble newThresh, PetscLogDouble *oldThresh)
438: {
440: if (oldThresh) *oldThresh = thresholdTime;
441: if (newThresh == PETSC_DECIDE) newThresh = 0.01;
442: if (newThresh == PETSC_DEFAULT) newThresh = 0.01;
443: thresholdTime = PetscMax(newThresh, 0.0);
444: return(0);
445: }
447: static PetscErrorCode PetscPrintExeSpecs(PetscViewer viewer)
448: {
449: PetscErrorCode ierr;
450: char arch[128],hostname[128],username[128],pname[PETSC_MAX_PATH_LEN],date[128];
451: char version[256], buildoptions[128] = "";
452: PetscMPIInt size;
453: size_t len;
456: MPI_Comm_size(PetscObjectComm((PetscObject)viewer),&size);
457: PetscGetArchType(arch,sizeof(arch));
458: PetscGetHostName(hostname,sizeof(hostname));
459: PetscGetUserName(username,sizeof(username));
460: PetscGetProgramName(pname,sizeof(pname));
461: PetscGetDate(date,sizeof(date));
462: PetscGetVersion(version,sizeof(version));
464: PetscViewerXMLStartSection(viewer, "runspecification", "Run Specification");
465: PetscViewerXMLPutString( viewer, "executable" , "Executable" , pname );
466: PetscViewerXMLPutString( viewer, "architecture", "Architecture" , arch );
467: PetscViewerXMLPutString( viewer, "hostname" , "Host" , hostname);
468: PetscViewerXMLPutInt( viewer, "nprocesses" , "Number of processes", size );
469: PetscViewerXMLPutString( viewer, "user" , "Run by user" , username);
470: PetscViewerXMLPutString( viewer, "date" , "Started at" , date);
471: PetscViewerXMLPutString( viewer, "petscrelease", "Petsc Release", version);
473: #if defined(PETSC_USE_DEBUG)
474: PetscStrlcat(buildoptions, "Debug ", sizeof(buildoptions));
475: #endif
476: #if defined(PETSC_USE_COMPLEX)
477: PetscStrlcat(buildoptions, "Complex ", sizeof(buildoptions));
478: #endif
479: #if defined(PETSC_USE_REAL_SINGLE)
480: PetscStrlcat(buildoptions, "Single ", sizeof(buildoptions));
481: #elif defined(PETSC_USE_REAL___FLOAT128)
482: PetscStrlcat(buildoptions, "Quadruple ", sizeof(buildoptions));
483: #elif defined(PETSC_USE_REAL___FP16)
484: PetscStrlcat(buildoptions, "Half ", sizeof(buildoptions));
485: #endif
486: #if defined(PETSC_USE_64BIT_INDICES)
487: PetscStrlcat(buildoptions, "Int64 ", sizeof(buildoptions));
488: #endif
489: #if defined(__cplusplus)
490: PetscStrlcat(buildoptions, "C++ ", sizeof(buildoptions));
491: #endif
492: PetscStrlen(buildoptions,&len);
493: if (len) {
494: PetscViewerXMLPutString(viewer, "petscbuildoptions", "Petsc build options", buildoptions);
495: }
496: PetscViewerXMLEndSection(viewer, "runspecification");
497: return(0);
498: }
500: /* Print the global performance: max, max/min, average and total of
501: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
502: */
503: static PetscErrorCode PetscPrintXMLGlobalPerformanceElement(PetscViewer viewer, const char *name, const char *desc, PetscLogDouble max, PetscLogDouble ratio, PetscLogDouble avg, PetscLogDouble tot)
504: {
508: PetscViewerXMLStartSection(viewer, name, desc);
509: PetscViewerXMLPutDouble(viewer, "max", NULL, max, "%e");
510: PetscViewerXMLPutDouble(viewer, "ratio", NULL, ratio, "%f");
511: if (avg>-1.0) {
512: PetscViewerXMLPutDouble(viewer, "average", NULL, avg, "%e");
513: }
514: if (tot>-1.0) {
515: PetscViewerXMLPutDouble(viewer, "total", NULL, tot, "%e");
516: }
517: PetscViewerXMLEndSection(viewer, name);
518: return(0);
519: }
521: /* Print the global performance: max, max/min, average and total of
522: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
523: */
524: static PetscErrorCode PetscPrintGlobalPerformance(PetscViewer viewer, PetscLogDouble locTotalTime)
525: {
526: PetscErrorCode ierr;
527: PetscLogDouble min, max, tot, ratio, avg;
528: PetscLogDouble flops, mem, red, mess;
529: PetscMPIInt size;
530: MPI_Comm comm;
533: PetscObjectGetComm((PetscObject)viewer,&comm);
534: MPI_Comm_size(comm, &size);
536: /* Must preserve reduction count before we go on */
537: red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
539: /* Calculate summary information */
540: PetscViewerXMLStartSection(viewer, "globalperformance", "Global performance");
542: /* Time */
543: MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
544: MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
545: MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
546: avg = (tot)/((PetscLogDouble) size);
547: if (min != 0.0) ratio = max/min;
548: else ratio = 0.0;
549: PetscPrintXMLGlobalPerformanceElement(viewer, "time", "Time (sec)", max, ratio, avg, -1.0);
551: /* Objects */
552: avg = (PetscLogDouble) petsc_numObjects;
553: MPIU_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
554: MPIU_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
555: MPIU_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
556: avg = (tot)/((PetscLogDouble) size);
557: if (min != 0.0) ratio = max/min;
558: else ratio = 0.0;
559: PetscPrintXMLGlobalPerformanceElement(viewer, "objects", "Objects", max, ratio, avg, -1.0);
561: /* Flop */
562: MPIU_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
563: MPIU_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
564: MPIU_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
565: avg = (tot)/((PetscLogDouble) size);
566: if (min != 0.0) ratio = max/min;
567: else ratio = 0.0;
568: PetscPrintXMLGlobalPerformanceElement(viewer, "mflop", "MFlop", max/1.0E6, ratio, avg/1.0E6, tot/1.0E6);
570: /* Flop/sec -- Must talk to Barry here */
571: if (locTotalTime != 0.0) flops = petsc_TotalFlops/locTotalTime;
572: else flops = 0.0;
573: MPIU_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
574: MPIU_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
575: MPIU_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
576: avg = (tot)/((PetscLogDouble) size);
577: if (min != 0.0) ratio = max/min;
578: else ratio = 0.0;
579: PetscPrintXMLGlobalPerformanceElement(viewer, "mflops", "MFlop/sec", max/1.0E6, ratio, avg/1.0E6, tot/1.0E6);
581: /* Memory */
582: PetscMallocGetMaximumUsage(&mem);
583: if (mem > 0.0) {
584: MPIU_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
585: MPIU_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
586: MPIU_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
587: avg = (tot)/((PetscLogDouble) size);
588: if (min != 0.0) ratio = max/min;
589: else ratio = 0.0;
590: PetscPrintXMLGlobalPerformanceElement(viewer, "memory", "Memory (MiB)", max/1024.0/1024.0, ratio, avg/1024.0/1024.0, tot/1024.0/1024.0);
591: }
592: /* Messages */
593: mess = 0.5*(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
594: MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
595: MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
596: MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
597: avg = (tot)/((PetscLogDouble) size);
598: if (min != 0.0) ratio = max/min;
599: else ratio = 0.0;
600: PetscPrintXMLGlobalPerformanceElement(viewer, "messagetransfers", "MPI Message Transfers", max, ratio, avg, tot);
602: /* Message Volume */
603: mess = 0.5*(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
604: MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
605: MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
606: MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
607: avg = (tot)/((PetscLogDouble) size);
608: if (min != 0.0) ratio = max/min;
609: else ratio = 0.0;
610: PetscPrintXMLGlobalPerformanceElement(viewer, "messagevolume", "MPI Message Volume (MiB)", max/1024.0/1024.0, ratio, avg/1024.0/1024.0, tot/1024.0/1024.0);
612: /* Reductions */
613: MPIU_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
614: MPIU_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
615: MPIU_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
616: if (min != 0.0) ratio = max/min;
617: else ratio = 0.0;
618: PetscPrintXMLGlobalPerformanceElement(viewer, "reductions", "MPI Reductions", max, ratio, -1, -1);
619: PetscViewerXMLEndSection(viewer, "globalperformance");
620: return(0);
621: }
623: typedef struct {
624: PetscLogEvent dftEvent;
625: NestedEventId nstEvent;
626: PetscLogEvent dftParent;
627: NestedEventId nstParent;
628: PetscBool own;
629: int depth;
630: NestedEventId* nstPath;
631: } PetscNestedEventTree;
633: /* Compare timers to sort them in the tree */
634: static int compareTreeItems(const void *item1_, const void *item2_)
635: {
636: int i;
637: PetscNestedEventTree *item1 = (PetscNestedEventTree *) item1_;
638: PetscNestedEventTree *item2 = (PetscNestedEventTree *) item2_;
639: for (i=0; i<PetscMin(item1->depth,item2->depth); i++) {
640: if (item1->nstPath[i]<item2->nstPath[i]) return -1;
641: if (item1->nstPath[i]>item2->nstPath[i]) return +1;
642: }
643: if (item1->depth < item2->depth) return -1;
644: if (item1->depth > item2->depth) return 1;
645: return 0;
646: }
647: /*
648: * Do MPI communication to get the complete, nested calling tree for all processes: there may be
649: * calls that happen in some processes, but not in others.
650: *
651: * The output, tree[nTimers] is an array of PetscNestedEventTree-structs.
652: * The tree is sorted so that the timers can be printed in the order of appearance.
653: *
654: * For tree-items which appear in the trees of multiple processes (which will be most items), the
655: * following rule is followed:
656: * + if information from my own process is available, then that is the information stored in tree.
657: * otherwise it is some other process's information.
658: */
659: static PetscErrorCode PetscLogNestedTreeCreate(PetscViewer viewer, PetscNestedEventTree **p_tree, int *p_nTimers)
660: {
661: PetscNestedEventTree *tree = NULL, *newTree;
662: int *treeIndices;
663: int nTimers, totalNTimers, i, j, iTimer0, maxDefaultTimer;
664: int yesno;
665: PetscBool done;
666: PetscErrorCode ierr;
667: int maxdepth;
668: int depth;
669: int illegalEvent;
670: int iextra;
671: NestedEventId *nstPath, *nstMyPath;
672: MPI_Comm comm;
675: PetscObjectGetComm((PetscObject)viewer,&comm);
677: /* Calculate memory needed to store everybody's information and allocate tree */
678: nTimers = 0;
679: for (i=0; i<nNestedEvents; i++) nTimers += nestedEvents[i].nParents;
681: PetscMalloc1(nTimers,&tree);
683: /* Fill tree with readily available information */
684: iTimer0 = 0;
685: maxDefaultTimer =0;
686: for (i=0; i<nNestedEvents; i++) {
687: int nParents = nestedEvents[i].nParents;
688: NestedEventId nstEvent = nestedEvents[i].nstEvent;
689: PetscLogEvent *dftParentsSorted = nestedEvents[i].dftParentsSorted;
690: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
691: for (j=0; j<nParents; j++) {
692: maxDefaultTimer = PetscMax(dftEvents[j],maxDefaultTimer);
694: tree[iTimer0+j].dftEvent = dftEvents[j];
695: tree[iTimer0+j].nstEvent = nstEvent;
696: tree[iTimer0+j].dftParent = dftParentsSorted[j];
697: tree[iTimer0+j].own = PETSC_TRUE;
699: tree[iTimer0+j].nstParent = 0;
700: tree[iTimer0+j].depth = 0;
701: tree[iTimer0+j].nstPath = NULL;
702: }
703: iTimer0 += nParents;
704: }
706: /* Calculate the global maximum for the default timer index, so array treeIndices can
707: * be allocated only once */
708: MPIU_Allreduce(&maxDefaultTimer, &j, 1, MPI_INT, MPI_MAX, comm);
709: maxDefaultTimer = j;
711: /* Find default timer's place in the tree */
712: PetscCalloc1(maxDefaultTimer+1,&treeIndices);
713: treeIndices[0] = 0;
714: for (i=0; i<nTimers; i++) {
715: PetscLogEvent dftEvent = tree[i].dftEvent;
716: treeIndices[dftEvent] = i;
717: }
719: /* Find each dftParent's nested identification */
720: for (i=0; i<nTimers; i++) {
721: PetscLogEvent dftParent = tree[i].dftParent;
722: if (dftParent!= DFT_ID_AWAKE) {
723: int j = treeIndices[dftParent];
724: tree[i].nstParent = tree[j].nstEvent;
725: }
726: }
728: /* Find depths for each timer path */
729: done = PETSC_FALSE;
730: maxdepth = 0;
731: while (!done) {
732: done = PETSC_TRUE;
733: for (i=0; i<nTimers; i++) {
734: if (tree[i].dftParent == DFT_ID_AWAKE) {
735: tree[i].depth = 1;
736: maxdepth = PetscMax(1,maxdepth);
737: } else {
738: int j = treeIndices[tree[i].dftParent];
739: depth = 1+tree[j].depth;
740: if (depth>tree[i].depth) {
741: done = PETSC_FALSE;
742: tree[i].depth = depth;
743: maxdepth = PetscMax(depth,maxdepth);
744: }
745: }
746: }
747: }
749: /* Allocate the paths in the entire tree */
750: for (i=0; i<nTimers; i++) {
751: depth = tree[i].depth;
752: PetscCalloc1(depth,&tree[i].nstPath);
753: }
755: /* Calculate the paths for all timers */
756: for (depth=1; depth<=maxdepth; depth++) {
757: for (i=0; i<nTimers; i++) {
758: if (tree[i].depth==depth) {
759: if (depth>1) {
760: int j = treeIndices[tree[i].dftParent];
761: PetscMemcpy(tree[i].nstPath,tree[j].nstPath,(depth-1)*sizeof(NestedEventId));
762: }
763: tree[i].nstPath[depth-1] = tree[i].nstEvent;
764: }
765: }
766: }
767: PetscFree(treeIndices);
769: /* Sort the tree on basis of the paths */
770: qsort(tree, nTimers, sizeof(PetscNestedEventTree), compareTreeItems);
772: /* Allocate an array to store paths */
773: depth = maxdepth;
774: MPIU_Allreduce(&depth, &maxdepth, 1, MPI_INT, MPI_MAX, comm);
775: PetscMalloc1(maxdepth+1, &nstPath);
776: PetscMalloc1(maxdepth+1, &nstMyPath);
778: /* Find an illegal nested event index (1+largest nested event index) */
779: illegalEvent = 1+nestedEvents[nNestedEvents-1].nstEvent;
780: i = illegalEvent;
781: MPIU_Allreduce(&i, &illegalEvent, 1, MPI_INT, MPI_MAX, comm);
783: /* First, detect timers which are not available in this process, but are available in others
784: * Allocate a new tree, that can contain all timers
785: * Then, fill the new tree with all (own and not-own) timers */
786: newTree= NULL;
787: for (yesno=0; yesno<=1; yesno++) {
788: depth = 1;
789: i = 0;
790: iextra = 0;
791: while (depth>0) {
792: int j;
793: PetscBool same;
795: /* Construct the next path in this process's tree:
796: * if necessary, supplement with invalid path entries */
797: depth++;
798: if (depth > maxdepth + 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Depth %d > maxdepth+1 %d",depth,maxdepth+1);
799: if (i<nTimers) {
800: for (j=0; j<tree[i].depth; j++) nstMyPath[j] = tree[i].nstPath[j];
801: for (j=tree[i].depth; j<depth; j++) nstMyPath[j] = illegalEvent;
802: } else {
803: for (j=0; j<depth; j++) nstMyPath[j] = illegalEvent;
804: }
806: /* Communicate with other processes to obtain the next path and its depth */
807: MPIU_Allreduce(nstMyPath, nstPath, depth, MPI_INT, MPI_MIN, comm);
808: for (j=depth-1; (int) j>=0; j--) {
809: if (nstPath[j]==illegalEvent) depth=j;
810: }
812: if (depth>0) {
813: /* If the path exists */
815: /* check whether the next path is the same as this process's next path */
816: same = PETSC_TRUE;
817: for (j=0; same && j<depth; j++) { same = (same && nstMyPath[j] == nstPath[j]) ? PETSC_TRUE : PETSC_FALSE;}
819: if (same) {
820: /* Register 'own path' */
821: if (newTree) newTree[i+iextra] = tree[i];
822: i++;
823: } else {
824: /* Register 'not an own path' */
825: if (newTree) {
826: newTree[i+iextra].nstEvent = nstPath[depth-1];
827: newTree[i+iextra].own = PETSC_FALSE;
828: newTree[i+iextra].depth = depth;
829: PetscMalloc1(depth, &newTree[i+iextra].nstPath);
830: for (j=0; j<depth; j++) {newTree[i+iextra].nstPath[j] = nstPath[j];}
832: newTree[i+iextra].dftEvent = 0;
833: newTree[i+iextra].dftParent = 0;
834: newTree[i+iextra].nstParent = 0;
835: }
836: iextra++;
837: }
839: }
840: }
842: /* Determine the size of the complete tree (with own and not-own timers) and allocate the new tree */
843: totalNTimers = nTimers + iextra;
844: if (!newTree) {
845: PetscMalloc1(totalNTimers, &newTree);
846: }
847: }
848: PetscFree(nstPath);
849: PetscFree(nstMyPath);
850: PetscFree(tree);
851: tree = newTree;
852: newTree = NULL;
854: /* Set return value and return */
855: *p_tree = tree;
856: *p_nTimers = totalNTimers;
857: return(0);
858: }
860: /*
861: * Delete the nested timer tree
862: */
863: static PetscErrorCode PetscLogNestedTreeDestroy(PetscNestedEventTree *tree, int nTimers)
864: {
865: int i;
866: PetscErrorCode ierr;
869: for (i=0; i<nTimers; i++) {
870: PetscFree(tree[i].nstPath);
871: }
872: PetscFree(tree);
873: return(0);
874: }
876: /* Print the global performance: max, max/min, average and total of
877: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
878: */
879: static PetscErrorCode PetscPrintXMLNestedLinePerfResults(PetscViewer viewer, const char *name, PetscLogDouble minvalue, PetscLogDouble maxvalue, PetscLogDouble minmaxtreshold)
880: {
884: PetscViewerXMLStartSection(viewer, name, NULL);
885: if (maxvalue>minvalue*minmaxtreshold) {
886: PetscViewerXMLPutDouble(viewer, "minvalue", NULL, minvalue, "%f");
887: PetscViewerXMLPutDouble(viewer, "maxvalue", NULL, maxvalue, "%f");
888: } else {
889: PetscViewerXMLPutDouble(viewer, "value", NULL, (minvalue+maxvalue)/2.0, "%g");
890: };
891: PetscViewerXMLEndSection(viewer, name);
892: return(0);
893: }
895: #define N_COMM 8
896: static PetscErrorCode PetscLogNestedTreePrintLine(PetscViewer viewer,PetscEventPerfInfo perfInfo,PetscLogDouble countsPerCall,int parentCount,int depth,const char *name,PetscLogDouble totalTime,PetscBool *isPrinted)
897: {
898: PetscLogDouble time = perfInfo.time;
899: PetscLogDouble timeMx, timeMn;
900: PetscLogDouble countsPerCallMx, countsPerCallMn;
901: PetscLogDouble reductSpeedMx, reductSpeedMn;
902: PetscLogDouble flopSpeedMx, flopSpeedMn;
903: PetscLogDouble msgSpeedMx, msgSpeedMn;
904: PetscLogDouble commarr_in[N_COMM], commarr_out[N_COMM];
906: MPI_Comm comm;
909: PetscObjectGetComm((PetscObject)viewer,&comm);
911: commarr_in[0] = time;
912: commarr_in[1] = -time;
913: MPIU_Allreduce(commarr_in, commarr_out, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
914: timeMx = commarr_out[0];
915: timeMn = -commarr_out[1];
917: commarr_in[0] = time>=timeMx*0.001 ? perfInfo.flops/time : 0;
918: commarr_in[1] = time>=timeMx*0.001 ? perfInfo.numReductions/time : 0;
919: commarr_in[2] = time>=timeMx*0.001 ? perfInfo.messageLength/time : 0;
920: commarr_in[3] = parentCount>0 ? countsPerCall : 0;
922: commarr_in[4] = time>=timeMx*0.001 ? -perfInfo.flops/time : -1e30;
923: commarr_in[5] = time>=timeMx*0.001 ? -perfInfo.numReductions/time : -1e30;
924: commarr_in[6] = time>=timeMx*0.001 ? -perfInfo.messageLength/time : -1e30;
925: commarr_in[7] = parentCount>0 ? -countsPerCall : -1e30;
927: MPIU_Allreduce(commarr_in, commarr_out, N_COMM, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
929: flopSpeedMx = commarr_out[0];
930: reductSpeedMx = commarr_out[1];
931: msgSpeedMx = commarr_out[2];
932: countsPerCallMx = commarr_out[3];
934: flopSpeedMn = -commarr_out[4];
935: reductSpeedMn = -commarr_out[5];
936: msgSpeedMn = -commarr_out[6];
937: countsPerCallMn = -commarr_out[7];
939: *isPrinted = ((timeMx/totalTime) >= THRESHOLD) ? PETSC_TRUE : PETSC_FALSE;
940: if (*isPrinted) {
941: PetscViewerXMLStartSection(viewer, "event", NULL);
942: PetscViewerXMLPutString(viewer, "name", NULL, name);
943: PetscPrintXMLNestedLinePerfResults(viewer, "time", timeMn/totalTime*100.0, timeMx/totalTime*100.0, 1.02);
945: if (countsPerCallMx<1.01 && countsPerCallMn>0.99) {
946: /* One call per parent */
947: } else {
948: PetscPrintXMLNestedLinePerfResults(viewer, "ncalls", countsPerCallMn, countsPerCallMx, 1.02);
949: }
951: if (flopSpeedMx<0.01) {
952: /* NO flops: don't print */
953: } else {
954: PetscPrintXMLNestedLinePerfResults(viewer, "mflops", flopSpeedMn/1e6, flopSpeedMx/1e6, 1.05);
955: }
957: if (msgSpeedMx<0.01) {
958: /* NO msgs: don't print */
959: } else {
960: PetscPrintXMLNestedLinePerfResults(viewer, "mbps", msgSpeedMn/1024.0/1024.0, msgSpeedMx/1024.0/1024.0, 1.05);
961: }
963: if (reductSpeedMx<0.01) {
964: /* NO reductions: don't print */
965: } else {
966: PetscPrintXMLNestedLinePerfResults(viewer, "nreductsps", reductSpeedMn, reductSpeedMx, 1.05);
967: }
968: }
969: return(0);
970: }
972: /* Count the number of times the parent event was called */
974: static int countParents( const PetscNestedEventTree *tree, PetscEventPerfInfo *eventPerfInfo, int i)
975: {
976: if (tree[i].depth<=1) {
977: return 1; /* Main event: only once */
978: } else if (!tree[i].own) {
979: return 1; /* This event didn't happen in this process, but did in another */
980: } else {
981: int iParent;
982: for (iParent=i-1; iParent>=0; iParent--) {
983: if (tree[iParent].depth == tree[i].depth-1) break;
984: }
985: if (tree[iParent].depth != tree[i].depth-1) {
986: printf("\n\n ***** Internal error: cannot find parent ****\n\n");
987: return -2;
988: } else {
989: PetscLogEvent dftEvent = tree[iParent].dftEvent;
990: return eventPerfInfo[dftEvent].count;
991: }
992: }
993: }
995: typedef struct {
996: int id;
997: PetscLogDouble val;
998: } PetscSortItem;
1000: static int compareSortItems(const void *item1_, const void *item2_)
1001: {
1002: PetscSortItem *item1 = (PetscSortItem *) item1_;
1003: PetscSortItem *item2 = (PetscSortItem *) item2_;
1004: if (item1->val > item2->val) return -1;
1005: if (item1->val < item2->val) return +1;
1006: return 0;
1007: }
1009: static PetscErrorCode PetscLogNestedTreePrint(PetscViewer viewer, PetscNestedEventTree *tree, int nTimers, int iStart, PetscLogDouble totalTime)
1010: {
1011: int depth = tree[iStart].depth;
1012: const char *name;
1013: int parentCount, nChildren;
1014: PetscSortItem *children;
1015: PetscErrorCode ierr;
1016: const int stage = MAINSTAGE;
1017: PetscStageLog stageLog;
1018: PetscEventRegInfo *eventRegInfo;
1019: PetscEventPerfInfo *eventPerfInfo;
1020: PetscEventPerfInfo myPerfInfo, otherPerfInfo, selfPerfInfo;
1021: PetscLogDouble countsPerCall;
1022: PetscBool wasPrinted;
1023: PetscBool childWasPrinted;
1024: MPI_Comm comm;
1026: /* Look up the name of the event and its PerfInfo */
1027: PetscLogGetStageLog(&stageLog);
1028: eventRegInfo = stageLog->eventLog->eventInfo;
1029: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1030: name = eventRegInfo[(PetscLogEvent)tree[iStart].nstEvent].name;
1032: PetscObjectGetComm((PetscObject)viewer,&comm);
1034: /* Count the number of child processes */
1035: nChildren = 0;
1036: {
1037: int i;
1038: for (i=iStart+1; i<nTimers; i++) {
1039: if (tree[i].depth <= depth) break;
1040: if (tree[i].depth == depth + 1) nChildren++;
1041: }
1042: }
1044: if (nChildren>0) {
1045: /* Create an array for the id-s and maxTimes of the children,
1046: * leaving 2 spaces for self-time and other-time */
1047: int i;
1048: PetscLogDouble *times, *maxTimes;
1050: PetscMalloc1(nChildren+2,&children);
1051: nChildren = 0;
1052: for (i=iStart+1; i<nTimers; i++) {
1053: if (tree[i].depth<=depth) break;
1054: if (tree[i].depth == depth + 1) {
1055: children[nChildren].id = i;
1056: children[nChildren].val = eventPerfInfo[tree[i].dftEvent].time ;
1057: nChildren++;
1058: }
1059: }
1061: /* Calculate the children's maximum times, to see whether children will be ignored or printed */
1062: PetscMalloc1(nChildren,×);
1063: for (i=0; i<nChildren; i++) { times[i] = children[i].val; }
1065: PetscMalloc1(nChildren,&maxTimes);
1066: MPIU_Allreduce(times, maxTimes, nChildren, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1067: PetscFree(times);
1069: for (i=0; i<nChildren; i++) { children[i].val = maxTimes[i]; }
1070: PetscFree(maxTimes);
1071: }
1073: if (!tree[iStart].own) {
1074: /* Set values for a timer that was not activated in this process
1075: * (but was, in other processes of this run) */
1076: PetscMemzero(&myPerfInfo,sizeof(myPerfInfo));
1078: selfPerfInfo = myPerfInfo;
1079: otherPerfInfo = myPerfInfo;
1081: parentCount = 1;
1082: countsPerCall = 0;
1083: } else {
1084: /* Set the values for a timer that was activated in this process */
1085: int i;
1086: PetscLogEvent dftEvent = tree[iStart].dftEvent;
1088: parentCount = countParents( tree, eventPerfInfo, iStart);
1089: myPerfInfo = eventPerfInfo[dftEvent];
1090: countsPerCall = (PetscLogDouble) myPerfInfo.count / (PetscLogDouble) parentCount;
1092: selfPerfInfo = myPerfInfo;
1093: otherPerfInfo.time = 0;
1094: otherPerfInfo.flops = 0;
1095: otherPerfInfo.numMessages = 0;
1096: otherPerfInfo.messageLength = 0;
1097: otherPerfInfo.numReductions = 0;
1099: for (i=0; i<nChildren; i++) {
1100: /* For all child counters: subtract the child values from self-timers */
1102: PetscLogEvent dftChild = tree[children[i].id].dftEvent;
1103: PetscEventPerfInfo childPerfInfo = eventPerfInfo[dftChild];
1105: selfPerfInfo.time -= childPerfInfo.time;
1106: selfPerfInfo.flops -= childPerfInfo.flops;
1107: selfPerfInfo.numMessages -= childPerfInfo.numMessages;
1108: selfPerfInfo.messageLength -= childPerfInfo.messageLength;
1109: selfPerfInfo.numReductions -= childPerfInfo.numReductions;
1111: if ((children[i].val/totalTime) < THRESHOLD) {
1112: /* Add them to 'other' if the time is ignored in the output */
1113: otherPerfInfo.time += childPerfInfo.time;
1114: otherPerfInfo.flops += childPerfInfo.flops;
1115: otherPerfInfo.numMessages += childPerfInfo.numMessages;
1116: otherPerfInfo.messageLength += childPerfInfo.messageLength;
1117: otherPerfInfo.numReductions += childPerfInfo.numReductions;
1118: }
1119: }
1120: }
1122: /* Main output for this timer */
1123: PetscLogNestedTreePrintLine(viewer, myPerfInfo, countsPerCall, parentCount, depth, name, totalTime, &wasPrinted);
1125: /* Now print the lines for the children */
1126: if (nChildren > 0) {
1127: /* Calculate max-times for 'self' and 'other' */
1128: int i;
1129: PetscLogDouble times[2], maxTimes[2];
1130: times[0] = selfPerfInfo.time; times[1] = otherPerfInfo.time;
1131: MPIU_Allreduce(times, maxTimes, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1132: children[nChildren+0].id = -1;
1133: children[nChildren+0].val = maxTimes[0];
1134: children[nChildren+1].id = -2;
1135: children[nChildren+1].val = maxTimes[1];
1137: /* Now sort the children (including 'self' and 'other') on total time */
1138: qsort(children, nChildren+2, sizeof(PetscSortItem), compareSortItems);
1140: /* Print (or ignore) the children in ascending order of total time */
1141: PetscViewerXMLStartSection(viewer,"events", NULL);
1142: for (i=0; i<nChildren+2; i++) {
1143: if ((children[i].val/totalTime) < THRESHOLD) {
1144: /* ignored: no output */
1145: } else if (children[i].id==-1) {
1146: PetscLogNestedTreePrintLine(viewer, selfPerfInfo, 1, parentCount, depth+1, "self", totalTime, &childWasPrinted);
1147: if (childWasPrinted) {
1148: PetscViewerXMLEndSection(viewer,"event");
1149: }
1150: } else if (children[i].id==-2) {
1151: size_t len;
1152: char *otherName;
1154: PetscStrlen(name,&len);
1155: PetscMalloc1(len+16,&otherName);
1156: PetscSNPrintf(otherName,len+16,"%s: other-timed",name);
1157: PetscLogNestedTreePrintLine(viewer, otherPerfInfo, 1, 1, depth+1, otherName, totalTime, &childWasPrinted);
1158: PetscFree(otherName);
1159: if (childWasPrinted) {
1160: PetscViewerXMLEndSection(viewer,"event");
1161: }
1162: } else {
1163: /* Print the child with a recursive call to this function */
1164: PetscLogNestedTreePrint(viewer, tree, nTimers, children[i].id, totalTime);
1165: }
1166: }
1167: PetscViewerXMLEndSection(viewer,"events");
1168: PetscFree(children);
1169: }
1171: if (wasPrinted) {
1172: PetscViewerXMLEndSection(viewer, "event");
1173: }
1174: return 0;
1175: }
1177: static PetscErrorCode PetscLogNestedTreePrintTop(PetscViewer viewer, PetscNestedEventTree *tree, int nTimers, PetscLogDouble totalTime)
1178: {
1179: int i, nChildren;
1180: PetscSortItem *children;
1181: PetscErrorCode ierr;
1182: const int stage = MAINSTAGE;
1183: PetscStageLog stageLog;
1184: PetscEventPerfInfo *eventPerfInfo;
1185: MPI_Comm comm;
1188: PetscObjectGetComm((PetscObject)viewer,&comm);
1190: /* Look up the PerfInfo */
1191: PetscLogGetStageLog(&stageLog);
1192: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1194: /* Count the number of child processes, and count total time */
1195: nChildren = 0;
1196: for (i=0; i<nTimers; i++)
1197: if (tree[i].depth==1) nChildren++;
1199: if (nChildren>0) {
1200: /* Create an array for the id-s and maxTimes of the children,
1201: * leaving 2 spaces for self-time and other-time */
1202: PetscLogDouble *times, *maxTimes;
1204: PetscMalloc1(nChildren,&children);
1205: nChildren = 0;
1206: for (i=0; i<nTimers; i++) {
1207: if (tree[i].depth == 1) {
1208: children[nChildren].id = i;
1209: children[nChildren].val = eventPerfInfo[tree[i].dftEvent].time ;
1210: nChildren++;
1211: }
1212: }
1214: /* Calculate the children's maximum times, to sort them */
1215: PetscMalloc1(nChildren,×);
1216: for (i=0; i<nChildren; i++) { times[i] = children[i].val; }
1218: PetscMalloc1(nChildren,&maxTimes);
1219: MPIU_Allreduce(times, maxTimes, nChildren, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1220: PetscFree(times);
1222: for (i=0; i<nChildren; i++) { children[i].val = maxTimes[i]; }
1223: PetscFree(maxTimes);
1225: /* Now sort the children on total time */
1226: qsort(children, nChildren, sizeof(PetscSortItem), compareSortItems);
1227: /* Print (or ignore) the children in ascending order of total time */
1228: PetscViewerXMLStartSection(viewer, "timertree", "Timings tree");
1229: PetscViewerXMLPutDouble(viewer, "totaltime", NULL, totalTime, "%f");
1230: PetscViewerXMLPutDouble(viewer, "timethreshold", NULL, thresholdTime, "%f");
1232: for (i=0; i<nChildren; i++) {
1233: if ((children[i].val/totalTime) < THRESHOLD) {
1234: /* ignored: no output */
1235: } else {
1236: /* Print the child with a recursive call to this function */
1237: PetscLogNestedTreePrint(viewer, tree, nTimers, children[i].id, totalTime);
1238: }
1239: }
1240: PetscViewerXMLEndSection(viewer, "timertree");
1241: PetscFree(children);
1242: }
1243: return(0);
1244: }
1246: typedef struct {
1247: char *name;
1248: PetscLogDouble time;
1249: PetscLogDouble flops;
1250: PetscLogDouble numMessages;
1251: PetscLogDouble messageLength;
1252: PetscLogDouble numReductions;
1253: } PetscSelfTimer;
1255: static PetscErrorCode PetscCalcSelfTime(PetscViewer viewer, PetscSelfTimer **p_self, int *p_nstMax)
1256: {
1257: PetscErrorCode ierr;
1258: const int stage = MAINSTAGE;
1259: PetscStageLog stageLog;
1260: PetscEventRegInfo *eventRegInfo;
1261: PetscEventPerfInfo *eventPerfInfo;
1262: PetscSelfTimer *selftimes;
1263: PetscSelfTimer *totaltimes;
1264: NestedEventId *nstEvents;
1265: int i, j, maxDefaultTimer;
1266: NestedEventId nst;
1267: PetscLogEvent dft;
1268: int nstMax, nstMax_local;
1269: MPI_Comm comm;
1272: PetscObjectGetComm((PetscObject)viewer,&comm);
1273: PetscLogGetStageLog(&stageLog);
1274: eventRegInfo = stageLog->eventLog->eventInfo;
1275: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1277: /* For each default timer, calculate the (one) nested timer that it corresponds to. */
1278: maxDefaultTimer =0;
1279: for (i=0; i<nNestedEvents; i++) {
1280: int nParents = nestedEvents[i].nParents;
1281: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1282: for (j=0; j<nParents; j++) maxDefaultTimer = PetscMax(dftEvents[j],maxDefaultTimer);
1283: }
1284: PetscMalloc1(maxDefaultTimer+1,&nstEvents);
1285: for (dft=0; dft<maxDefaultTimer; dft++) {nstEvents[dft] = 0;}
1286: for (i=0; i<nNestedEvents; i++) {
1287: int nParents = nestedEvents[i].nParents;
1288: NestedEventId nstEvent = nestedEvents[i].nstEvent;
1289: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1290: for (j=0; j<nParents; j++) nstEvents[dftEvents[j]] = nstEvent;
1291: }
1293: /* Calculate largest nested event-ID */
1294: nstMax_local = 0;
1295: for (i=0; i<nNestedEvents; i++) nstMax_local = PetscMax(nestedEvents[i].nstEvent,nstMax_local);
1296: MPIU_Allreduce(&nstMax_local, &nstMax, 1, MPI_INT, MPI_MAX, comm);
1299: /* Initialize all total-times with zero */
1300: PetscMalloc1(nstMax+1,&selftimes);
1301: PetscMalloc1(nstMax+1,&totaltimes);
1302: for (nst=0; nst<=nstMax; nst++) {
1303: totaltimes[nst].time = 0;
1304: totaltimes[nst].flops = 0;
1305: totaltimes[nst].numMessages = 0;
1306: totaltimes[nst].messageLength = 0;
1307: totaltimes[nst].numReductions = 0;
1308: totaltimes[nst].name = NULL;
1309: }
1311: /* Calculate total-times */
1312: for (i=0; i<nNestedEvents; i++) {
1313: const int nParents = nestedEvents[i].nParents;
1314: const NestedEventId nstEvent = nestedEvents[i].nstEvent;
1315: const PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1316: for (j=0; j<nParents; j++) {
1317: const PetscLogEvent dftEvent = dftEvents[j];
1318: totaltimes[nstEvent].time += eventPerfInfo[dftEvent].time;
1319: totaltimes[nstEvent].flops += eventPerfInfo[dftEvent].flops;
1320: totaltimes[nstEvent].numMessages += eventPerfInfo[dftEvent].numMessages;
1321: totaltimes[nstEvent].messageLength += eventPerfInfo[dftEvent].messageLength;
1322: totaltimes[nstEvent].numReductions += eventPerfInfo[dftEvent].numReductions;
1323: }
1324: totaltimes[nstEvent].name = eventRegInfo[(PetscLogEvent)nstEvent].name;
1325: }
1327: /* Initialize: self-times := totaltimes */
1328: for (nst=0; nst<=nstMax; nst++) { selftimes[nst] = totaltimes[nst]; }
1330: /* Subtract timed subprocesses from self-times */
1331: for (i=0; i<nNestedEvents; i++) {
1332: const int nParents = nestedEvents[i].nParents;
1333: const PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1334: const NestedEventId *dftParentsSorted = nestedEvents[i].dftParentsSorted;
1335: for (j=0; j<nParents; j++) {
1336: if (dftParentsSorted[j] != DFT_ID_AWAKE) {
1337: const PetscLogEvent dftEvent = dftEvents[j];
1338: const NestedEventId nstParent = nstEvents[dftParentsSorted[j]];
1339: selftimes[nstParent].time -= eventPerfInfo[dftEvent].time;
1340: selftimes[nstParent].flops -= eventPerfInfo[dftEvent].flops;
1341: selftimes[nstParent].numMessages -= eventPerfInfo[dftEvent].numMessages;
1342: selftimes[nstParent].messageLength -= eventPerfInfo[dftEvent].messageLength;
1343: selftimes[nstParent].numReductions -= eventPerfInfo[dftEvent].numReductions;
1344: }
1345: }
1346: }
1348: PetscFree(nstEvents);
1349: PetscFree(totaltimes);
1351: /* Set outputs */
1352: *p_self = selftimes;
1353: *p_nstMax = nstMax;
1354: return(0);
1355: }
1357: static PetscErrorCode PetscPrintSelfTime(PetscViewer viewer, const PetscSelfTimer *selftimes, int nstMax, PetscLogDouble totalTime)
1358: {
1359: PetscErrorCode ierr;
1360: int i;
1361: NestedEventId nst;
1362: PetscSortItem *sortSelfTimes;
1363: PetscLogDouble *times, *maxTimes;
1364: PetscStageLog stageLog;
1365: PetscEventRegInfo *eventRegInfo;
1366: const int dum_depth = 1, dum_count=1, dum_parentcount=1;
1367: PetscBool wasPrinted;
1368: MPI_Comm comm;
1371: PetscObjectGetComm((PetscObject)viewer,&comm);
1372: PetscLogGetStageLog(&stageLog);
1373: eventRegInfo = stageLog->eventLog->eventInfo;
1375: PetscMalloc1(nstMax+1,×);
1376: PetscMalloc1(nstMax+1,&maxTimes);
1377: for (nst=0; nst<=nstMax; nst++) { times[nst] = selftimes[nst].time;}
1378: MPIU_Allreduce(times, maxTimes, nstMax+1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1379: PetscFree(times);
1381: PetscMalloc1(nstMax+1,&sortSelfTimes);
1383: /* Sort the self-timers on basis of the largest time needed */
1384: for (nst=0; nst<=nstMax; nst++) {
1385: sortSelfTimes[nst].id = nst;
1386: sortSelfTimes[nst].val = maxTimes[nst];
1387: }
1388: PetscFree(maxTimes);
1389: qsort(sortSelfTimes, nstMax+1, sizeof(PetscSortItem), compareSortItems);
1391: PetscViewerXMLStartSection(viewer, "selftimertable", "Self-timings");
1392: PetscViewerXMLPutDouble(viewer, "totaltime", NULL, totalTime, "%f");
1394: for (i=0; i<=nstMax; i++) {
1395: if ((sortSelfTimes[i].val/totalTime) >= THRESHOLD) {
1396: NestedEventId nstEvent = sortSelfTimes[i].id;
1397: const char *name = eventRegInfo[(PetscLogEvent)nstEvent].name;
1398: PetscEventPerfInfo selfPerfInfo;
1400: selfPerfInfo.time = selftimes[nstEvent].time ;
1401: selfPerfInfo.flops = selftimes[nstEvent].flops;
1402: selfPerfInfo.numMessages = selftimes[nstEvent].numMessages;
1403: selfPerfInfo.messageLength = selftimes[nstEvent].messageLength;
1404: selfPerfInfo.numReductions = selftimes[nstEvent].numReductions;
1406: PetscLogNestedTreePrintLine(viewer, selfPerfInfo, dum_count, dum_parentcount, dum_depth, name, totalTime, &wasPrinted);
1407: if (wasPrinted){
1408: PetscViewerXMLEndSection(viewer, "event");
1409: }
1410: }
1411: }
1412: PetscViewerXMLEndSection(viewer, "selftimertable");
1413: PetscFree(sortSelfTimes);
1414: return(0);
1415: }
1417: PetscErrorCode PetscLogView_Nested(PetscViewer viewer)
1418: {
1419: PetscErrorCode ierr;
1420: PetscLogDouble locTotalTime, globTotalTime;
1421: PetscNestedEventTree *tree = NULL;
1422: PetscSelfTimer *selftimers = NULL;
1423: int nTimers = 0, nstMax = 0;
1424: MPI_Comm comm;
1427: PetscObjectGetComm((PetscObject)viewer,&comm);
1428: PetscViewerInitASCII_XML(viewer);
1429: PetscViewerASCIIPrintf(viewer, "<!-- PETSc Performance Summary: -->\n");
1430: PetscViewerXMLStartSection(viewer, "petscroot", NULL);
1432: /* Get the total elapsed time, local and global maximum */
1433: PetscTime(&locTotalTime); locTotalTime -= petsc_BaseTime;
1434: MPIU_Allreduce(&locTotalTime, &globTotalTime, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1436: /* Print global information about this run */
1437: PetscPrintExeSpecs(viewer);
1438: PetscPrintGlobalPerformance(viewer, locTotalTime);
1440: /* Collect nested timer tree info from all processes */
1441: PetscLogNestedTreeCreate(viewer, &tree, &nTimers);
1442: PetscLogNestedTreePrintTop(viewer, tree, nTimers, globTotalTime);
1443: PetscLogNestedTreeDestroy(tree, nTimers);
1445: /* Calculate self-time for all (not-nested) events */
1446: PetscCalcSelfTime(viewer, &selftimers, &nstMax);
1447: PetscPrintSelfTime(viewer, selftimers, nstMax, globTotalTime);
1448: PetscFree(selftimers);
1450: PetscViewerXMLEndSection(viewer, "petscroot");
1451: PetscViewerFinalASCII_XML(viewer);
1452: return(0);
1453: }
1455: PETSC_EXTERN PetscErrorCode PetscASend(int count, int datatype)
1456: {
1459: #endif
1462: petsc_send_ct++;
1464: PetscMPITypeSize(&petsc_send_len,count, MPI_Type_f2c((MPI_Fint) datatype));
1465: #endif
1466: return(0);
1467: }
1469: PETSC_EXTERN PetscErrorCode PetscARecv(int count, int datatype)
1470: {
1473: #endif
1476: petsc_recv_ct++;
1478: PetscMPITypeSize(&petsc_recv_len,count, MPI_Type_f2c((MPI_Fint) datatype));
1479: #endif
1480: return(0);
1481: }
1483: PETSC_EXTERN PetscErrorCode PetscAReduce()
1484: {
1486: petsc_allreduce_ct++;
1487: return(0);
1488: }
1490: #endif