Actual source code: xmllogevent.c
petsc-3.9.4 2018-09-11
1: /*************************************************************************************
2: * M A R I T I M E R E S E A R C H I N S T I T U T E N E T H E R L A N D S *
3: *************************************************************************************
4: * authors: Bas van 't Hof, Koos Huijssen, Christiaan M. Klaij *
5: *************************************************************************************
6: * content: Support for nested PetscTimers *
7: *************************************************************************************/
8: #include <petsclog.h>
9: #include <petsc/private/logimpl.h>
10: #include <petsctime.h>
11: #include <petscviewer.h>
12: #include "../src/sys/logging/xmllogevent.h"
13: #include "../src/sys/logging/xmlviewer.h"
15: #if defined(PETSC_USE_LOG)
17: /*
18: * Support for nested PetscTimers
19: *
20: * PetscTimers keep track of a lot of useful information: Wall clock times,
21: * message passing statistics, flop counts. Information about the nested structure
22: * of the timers is lost. Example:
23: *
24: * 7:30 Start: awake
25: * 7:30 Start: morning routine
26: * 7:40 Start: eat
27: * 7:49 Done: eat
28: * 7:43 Done: morning routine
29: * 8:15 Start: work
30: * 12:15 Start: eat
31: * 12:45 Done: eat
32: * 16:00 Done: work
33: * 16:30 Start: evening routine
34: * 18:30 Start: eat
35: * 19:15 Done: eat
36: * 22:00 Done: evening routine
37: * 22:00 Done: awake
38: *
39: * Petsc timers provide the following timer results:
40: *
41: * awake: 1 call 14:30 hours
42: * morning routine: 1 call 0:13 hours
43: * eat: 3 calls 1:24 hours
44: * work: 1 call 7:45 hours
45: * evening routine 1 call 5:30 hours
46: *
47: * Nested timers can be used to get the following table:
48: *
49: * [1 call]: awake 14:30 hours
50: * [1 call]: morning routine 0:13 hours ( 2 % of awake)
51: * [1 call]: eat 0:09 hours (69 % of morning routine)
52: * rest (morning routine) 0:04 hours (31 % of morning routine)
53: * [1 call]: work 7:45 hours (53 % of awake)
54: * [1 call]: eat 0:30 hours ( 6 % of work)
55: * rest (work) 7:15 hours (94 % of work)
56: * [1 call]: evening routine 5:30 hours (38 % of awake)
57: * [1 call]: eat 0:45 hours (14 % of evening routine)
58: * rest (evening routine) 4:45 hours (86 % of morning routine)
59: *
60: * We ignore the concept of 'stages', because these seem to be conflicting notions, or at least,
61: * the nested timers make the stages unnecessary.
62: *
63: */
65: /*
66: * Data structures for keeping track of nested timers:
67: *
68: * nestedEvents: information about the timers that have actually been activated
69: * dftParentActive: if a timer is started now, it is part of (nested inside) the dftParentActive
70: *
71: * The Default-timers are used to time the nested timers. Every nested timer corresponds to
72: * (one or more) default timers, where one of the default timers has the same event-id as the
73: * nested one.
74: *
75: * Because of the risk of confusion between nested timer ids and default timer ids, we
76: * introduce a typedef for nested events (NestedEventId) and use the existing type PetscLogEvent
77: * only for default events. Also, all nested event variables are prepended with 'nst', and
78: * default timers with 'dft'.
79: */
81: #define DFT_ID_AWAKE -1
83: typedef PetscLogEvent NestedEventId;
84: typedef struct {
85: NestedEventId nstEvent; /* event-code for this nested event, argument 'event' in PetscLogEventStartNested */
86: int nParents; /* number of 'dftParents': the default timer which was the dftParentActive when this nested timer was activated */
87: PetscLogEvent *dftParentsSorted; /* The default timers which were the dftParentActive when this nested event was started */
88: PetscLogEvent *dftEvents; /* The default timers which represent the different 'instances' of this nested event */
90: PetscLogEvent *dftParents; /* The default timers which were the dftParentActive when this nested event was started */
91: PetscLogEvent *dftEventsSorted; /* The default timers which represent the different 'instances' of this nested event */
92: } PetscNestedEvent;
94: static PetscLogEvent dftParentActive = DFT_ID_AWAKE;
95: static int nNestedEvents = 0;
96: static int nNestedEventsAllocated = 0;
97: static PetscNestedEvent *nestedEvents = NULL;
98: static PetscLogDouble thresholdTime = 0.01; /* initial value was 0.1 */
100: #define THRESHOLD (thresholdTime/100.0+1e-12)
102: static PetscErrorCode PetscLogEventBeginNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4);
103: static PetscErrorCode PetscLogEventEndNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4);
104: PETSC_INTERN PetscErrorCode PetscLogView_Nested(PetscViewer);
107: /*@C
108: PetscLogNestedBegin - Turns on nested logging of objects and events. This logs flop
109: rates and object creation and should not slow programs down too much.
111: Logically Collective over PETSC_COMM_WORLD
113: Options Database Keys:
114: . -log_view :filename.xml:ascii_xml - Prints an XML summary of flop and timing information to the file
116: Usage:
117: .vb
118: PetscInitialize(...);
119: PetscLogNestedBegin();
120: ... code ...
121: PetscLogView(viewer);
122: PetscFinalize();
123: .ve
125: Level: advanced
127: .keywords: log, begin
128: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogView(), PetscLogTraceBegin(), PetscLogDefaultBegin()
129: @*/
130: PetscErrorCode PetscLogNestedBegin(void)
131: {
134: if (nestedEvents) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_COR,"nestedEvents already allocated");
136: nNestedEventsAllocated = 10;
137: PetscMalloc1(nNestedEventsAllocated,&nestedEvents);
140: dftParentActive = DFT_ID_AWAKE;
141: nNestedEvents =1;
143: /* 'Awake' is nested event 0. It has no parents */
144: nestedEvents[0].nstEvent = 0;
145: nestedEvents[0].nParents = 0;
146: nestedEvents[0].dftParentsSorted = NULL;
147: nestedEvents[0].dftEvents = NULL;
148: nestedEvents[0].dftParents = NULL;
149: nestedEvents[0].dftEventsSorted = NULL;
151: PetscLogSet(PetscLogEventBeginNested,PetscLogEventEndNested);
152: return(0);
153: }
155: /* Delete the data structures for the nested timers */
156: PetscErrorCode PetscLogNestedEnd(void)
157: {
159: int i;
161: if (!nestedEvents) return(0);
162: for (i=0; i<nNestedEvents; i++) {
163: PetscFree4(nestedEvents[i].dftParentsSorted,nestedEvents[i].dftEventsSorted,nestedEvents[i].dftParents,nestedEvents[i].dftEvents);
164: }
165: PetscFree(nestedEvents);
166: nestedEvents = NULL;
167: nNestedEvents = 0;
168: nNestedEventsAllocated = 0;
169: return(0);
170: }
173: /*
174: * UTILITIES: FIND STUFF IN SORTED ARRAYS
175: *
176: * Utility: find a default timer in a sorted array */
177: static PetscErrorCode PetscLogEventFindDefaultTimer(PetscLogEvent dftIndex, /* index to be found */
178: const PetscLogEvent *dftArray, /* sorted array of PetscLogEvent-ids */
179: int narray, /* dimension of dftArray */
180: int *entry) /* entry in the array where dftIndex may be found;
181: * if dftArray[entry] != dftIndex, then dftIndex is not part of dftArray
182: * In that case, the dftIndex can be inserted at this entry. */
183: {
185: if (narray==0 || dftIndex <= dftArray[0]) {
186: *entry = 0;
187: } else if (dftIndex > dftArray[narray-1]) {
188: *entry = narray;
189: } else {
190: int ihigh=narray-1, ilow=0;
191: while (ihigh>ilow) {
192: const int imiddle = (ihigh+ilow)/2;
193: if (dftArray[imiddle] > dftIndex) {
194: ihigh=imiddle;
195: } else if (dftArray[imiddle]<dftIndex) {
196: ilow =imiddle+1;
197: } else {
198: ihigh=imiddle;
199: ilow =imiddle;
200: }
201: }
202: *entry = ihigh;
203: }
204: return(0);
205: }
207: /* Utility: find the nested event with given identification */
208: static PetscErrorCode PetscLogEventFindNestedTimer(NestedEventId nstEvent, /* Nested event to be found */
209: int *entry) /* entry in the nestedEvents where nstEvent may be found;
210: * if nestedEvents[entry].nstEvent != nstEvent, then index is not part of iarray */
211: {
214: if (nNestedEvents==0 || nstEvent <= nestedEvents[0].nstEvent) {
215: *entry = 0;
216: } else if (nstEvent > nestedEvents[nNestedEvents-1].nstEvent) {
217: *entry = nNestedEvents;
218: } else {
219: int ihigh=nNestedEvents-1, ilow=0;
220: while (ihigh>ilow) {
221: const int imiddle = (ihigh+ilow)/2;
222: if (nestedEvents[imiddle].nstEvent > nstEvent) {
223: ihigh=imiddle;
224: } else if (nestedEvents[imiddle].nstEvent<nstEvent) {
225: ilow =imiddle+1;
226: } else {
227: ihigh=imiddle;
228: ilow =imiddle;
229: }
230: }
231: *entry = ihigh;
232: }
233: return(0);
234: }
236: /*
237: Nested logging is not prepared yet to support user-defined logging stages, so for now we force logging on the main stage.
238: Using PetscLogStage{Push/Pop}() would be more appropriate, but these two calls do extra bookkeeping work we don't need.
239: */
241: #define MAINSTAGE 0
243: static PetscLogStage savedStage = 0;
245: PETSC_STATIC_INLINE PetscErrorCode PetscLogStageOverride(void)
246: {
247: PetscStageLog stageLog = petsc_stageLog;
251: if (stageLog->curStage == MAINSTAGE) return(0);
252: savedStage = stageLog->curStage;
253: stageLog->curStage = MAINSTAGE;
254: PetscIntStackPush(stageLog->stack, MAINSTAGE);
255: return(0);
256: }
258: PETSC_STATIC_INLINE PetscErrorCode PetscLogStageRestore(void)
259: {
260: PetscStageLog stageLog = petsc_stageLog;
264: if (savedStage == MAINSTAGE) return(0);
265: stageLog->curStage = savedStage;
266: PetscIntStackPop(stageLog->stack, &savedStage);
267: return(0);
268: }
270: /******************************************************************************************/
271: /* Start a nested event */
272: static PetscErrorCode PetscLogEventBeginNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
273: {
274: PetscErrorCode ierr;
275: int entry, pentry, tentry,i;
276: PetscLogEvent dftEvent;
279: PetscLogEventFindNestedTimer(nstEvent, &entry);
280: if (entry>=nNestedEvents || nestedEvents[entry].nstEvent != nstEvent) {
281: /* Nested event doesn't exist yet: create it */
283: if (nNestedEvents==nNestedEventsAllocated) {
284: /* Enlarge and re-allocate nestedEvents if needed */
285: PetscNestedEvent *tmp = nestedEvents;
286: PetscMalloc1(2*nNestedEvents,&nestedEvents);
287: nNestedEventsAllocated*=2;
288: PetscMemcpy(nestedEvents, tmp, nNestedEvents*sizeof(PetscNestedEvent));
289: PetscFree(tmp);
290: }
292: /* Clear space in nestedEvents for new nested event */
293: nNestedEvents++;
294: for (i = nNestedEvents-1; i>entry; i--) {
295: nestedEvents[i] = nestedEvents[i-1];
296: }
298: /* Create event in nestedEvents */
299: nestedEvents[entry].nstEvent = nstEvent;
300: nestedEvents[entry].nParents=1;
301: PetscMalloc4(1,&nestedEvents[entry].dftParentsSorted,1,&nestedEvents[entry].dftEventsSorted,1,&nestedEvents[entry].dftParents,1,&nestedEvents[entry].dftEvents);
303: /* Fill in new event */
304: pentry = 0;
305: dftEvent = (PetscLogEvent) nstEvent;
307: nestedEvents[entry].nstEvent = nstEvent;
308: nestedEvents[entry].dftParents[pentry] = dftParentActive;
309: nestedEvents[entry].dftEvents[pentry] = dftEvent;
310: nestedEvents[entry].dftParentsSorted[pentry] = dftParentActive;
311: nestedEvents[entry].dftEventsSorted[pentry] = dftEvent;
313: } else {
314: /* Nested event exists: find current dftParentActive among parents */
315: PetscLogEvent *dftParentsSorted = nestedEvents[entry].dftParentsSorted;
316: PetscLogEvent *dftEvents = nestedEvents[entry].dftEvents;
317: int nParents = nestedEvents[entry].nParents;
319: PetscLogEventFindDefaultTimer( dftParentActive, dftParentsSorted, nParents, &pentry);
321: if (pentry>=nParents || dftParentActive != dftParentsSorted[pentry]) {
322: /* dftParentActive not in the list: add it to the list */
323: int i;
324: PetscLogEvent *dftParents = nestedEvents[entry].dftParents;
325: PetscLogEvent *dftEventsSorted = nestedEvents[entry].dftEventsSorted;
326: char name[100];
328: /* Register a new default timer */
329: sprintf(name, "%d -> %d", (int) dftParentActive, (int) nstEvent);
330: PetscLogEventRegister(name, 0, &dftEvent);
331: PetscLogEventFindDefaultTimer( dftEvent, dftEventsSorted, nParents, &tentry);
333: /* Reallocate parents and dftEvents to make space for new parent */
334: PetscMalloc4(1+nParents,&nestedEvents[entry].dftParentsSorted,1+nParents,&nestedEvents[entry].dftEventsSorted,1+nParents,&nestedEvents[entry].dftParents,1+nParents,&nestedEvents[entry].dftEvents);
335: PetscMemcpy(nestedEvents[entry].dftParentsSorted, dftParentsSorted, nParents*sizeof(PetscLogEvent));
336: PetscMemcpy(nestedEvents[entry].dftEventsSorted, dftEventsSorted, nParents*sizeof(PetscLogEvent));
337: PetscMemcpy(nestedEvents[entry].dftParents, dftParents, nParents*sizeof(PetscLogEvent));
338: PetscMemcpy(nestedEvents[entry].dftEvents, dftEvents, nParents*sizeof(PetscLogEvent));
339: PetscFree4(dftParentsSorted,dftEventsSorted,dftParents,dftEvents);
341: dftParents = nestedEvents[entry].dftParents;
342: dftEvents = nestedEvents[entry].dftEvents;
343: dftParentsSorted = nestedEvents[entry].dftParentsSorted;
344: dftEventsSorted = nestedEvents[entry].dftEventsSorted;
346: nestedEvents[entry].nParents++;
347: nParents++;
349: for (i = nParents-1; i>pentry; i--) {
350: dftParentsSorted[i] = dftParentsSorted[i-1];
351: dftEvents[i] = dftEvents[i-1];
352: }
353: for (i = nParents-1; i>tentry; i--) {
354: dftParents[i] = dftParents[i-1];
355: dftEventsSorted[i] = dftEventsSorted[i-1];
356: }
358: /* Fill in the new default timer */
359: dftParentsSorted[pentry] = dftParentActive;
360: dftEvents[pentry] = dftEvent;
361: dftParents[tentry] = dftParentActive;
362: dftEventsSorted[tentry] = dftEvent;
364: } else {
365: /* dftParentActive was found: find the corresponding default 'dftEvent'-timer */
366: dftEvent = nestedEvents[entry].dftEvents[pentry];
367: }
368: }
370: /* Start the default 'dftEvent'-timer and update the dftParentActive */
371: PetscLogStageOverride();
372: PetscLogEventBeginDefault(dftEvent,t,o1,o2,o3,o4);
373: PetscLogStageRestore();
374: dftParentActive = dftEvent;
375: return(0);
376: }
378: /* End a nested event */
379: static PetscErrorCode PetscLogEventEndNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
380: {
381: PetscErrorCode ierr;
382: int entry, pentry, nParents;
383: PetscLogEvent *dftEventsSorted;
386: /* Find the nested event */
387: PetscLogEventFindNestedTimer(nstEvent, &entry);
388: if (entry>=nNestedEvents) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Logging event %d larger than number of events %d",entry,nNestedEvents);
389: if (nestedEvents[entry].nstEvent != nstEvent) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Logging event %d had unbalanced begin/end pairs does not match %d",entry,nstEvent);
390: dftEventsSorted = nestedEvents[entry].dftEventsSorted;
391: nParents = nestedEvents[entry].nParents;
393: /* Find the current default timer among the 'dftEvents' of this event */
394: PetscLogEventFindDefaultTimer( dftParentActive, dftEventsSorted, nParents, &pentry);
396: if (pentry>=nParents) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Entry %d is larger than number of parents %d",pentry,nParents);
397: if (dftEventsSorted[pentry] != dftParentActive) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Active parent is %d, but we seem to be closing %d",dftParentActive,dftEventsSorted[pentry]);
399: /* Stop the default timer and update the dftParentActive */
400: PetscLogStageOverride();
401: PetscLogEventEndDefault(dftParentActive,t,o1,o2,o3,o4);
402: PetscLogStageRestore();
403: dftParentActive = nestedEvents[entry].dftParents[pentry];
404: return(0);
405: }
407: /*@
408: PetscLogSetThreshold - Set the threshold time for logging the events; this is a percentage out of 100, so 1. means any event
409: that takes 1 or more percent of the time.
411: Logically Collective over PETSC_COMM_WORLD
413: Input Parameter:
414: . newThresh - the threshold to use
416: Output Parameter:
417: . oldThresh - the previously set threshold value
419: Options Database Keys:
420: . -log_view :filename.xml:ascii_xml - Prints an XML summary of flop and timing information to the file
422: Usage:
423: .vb
424: PetscInitialize(...);
425: PetscLogNestedBegin();
426: PetscLogSetThreshold(0.1,&oldthresh);
427: ... code ...
428: PetscLogView(viewer);
429: PetscFinalize();
430: .ve
432: Level: advanced
434: .keywords: log, begin
435: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogView(), PetscLogTraceBegin(), PetscLogDefaultBegin(),
436: PetscLogNestedBegin()
437: @*/
438: PetscErrorCode PetscLogSetThreshold(PetscLogDouble newThresh, PetscLogDouble *oldThresh)
439: {
441: if (oldThresh) *oldThresh = thresholdTime;
442: if (newThresh == PETSC_DECIDE) newThresh = 0.01;
443: if (newThresh == PETSC_DEFAULT) newThresh = 0.01;
444: thresholdTime = PetscMax(newThresh, 0.0);
445: return(0);
446: }
448: static PetscErrorCode PetscPrintExeSpecs(PetscViewer viewer)
449: {
450: PetscErrorCode ierr;
451: char arch[128],hostname[128],username[128],pname[PETSC_MAX_PATH_LEN],date[128];
452: char version[256], buildoptions[128] = "";
453: PetscMPIInt size;
454: size_t len;
457: MPI_Comm_size(PetscObjectComm((PetscObject)viewer),&size);
458: PetscGetArchType(arch,sizeof(arch));
459: PetscGetHostName(hostname,sizeof(hostname));
460: PetscGetUserName(username,sizeof(username));
461: PetscGetProgramName(pname,sizeof(pname));
462: PetscGetDate(date,sizeof(date));
463: PetscGetVersion(version,sizeof(version));
465: PetscViewerXMLStartSection(viewer, "runspecification", "Run Specification");
466: PetscViewerXMLPutString( viewer, "executable" , "Executable" , pname );
467: PetscViewerXMLPutString( viewer, "architecture", "Architecture" , arch );
468: PetscViewerXMLPutString( viewer, "hostname" , "Host" , hostname);
469: PetscViewerXMLPutInt( viewer, "nprocesses" , "Number of processes", size );
470: PetscViewerXMLPutString( viewer, "user" , "Run by user" , username);
471: PetscViewerXMLPutString( viewer, "date" , "Started at" , date);
472: PetscViewerXMLPutString( viewer, "petscrelease", "Petsc Release", version);
474: #if defined(PETSC_USE_DEBUG)
475: PetscStrlcat(buildoptions, "Debug ", sizeof(buildoptions));
476: #endif
477: #if defined(PETSC_USE_COMPLEX)
478: PetscStrlcat(buildoptions, "Complex ", sizeof(buildoptions));
479: #endif
480: #if defined(PETSC_USE_REAL_SINGLE)
481: PetscStrlcat(buildoptions, "Single ", sizeof(buildoptions));
482: #elif defined(PETSC_USE_REAL___FLOAT128)
483: PetscStrlcat(buildoptions, "Quadruple ", sizeof(buildoptions));
484: #elif defined(PETSC_USE_REAL___FP16)
485: PetscStrlcat(buildoptions, "Half ", sizeof(buildoptions));
486: #endif
487: #if defined(PETSC_USE_64BIT_INDICES)
488: PetscStrlcat(buildoptions, "Int64 ", sizeof(buildoptions));
489: #endif
490: #if defined(__cplusplus)
491: PetscStrlcat(buildoptions, "C++ ", sizeof(buildoptions));
492: #endif
493: PetscStrlen(buildoptions,&len);
494: if (len) {
495: PetscViewerXMLPutString(viewer, "petscbuildoptions", "Petsc build options", buildoptions);
496: }
497: PetscViewerXMLEndSection(viewer, "runspecification");
498: return(0);
499: }
501: /* Print the global performance: max, max/min, average and total of
502: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
503: */
504: static PetscErrorCode PetscPrintXMLGlobalPerformanceElement(PetscViewer viewer, const char *name, const char *desc, PetscLogDouble max, PetscLogDouble ratio, PetscLogDouble avg, PetscLogDouble tot)
505: {
509: PetscViewerXMLStartSection(viewer, name, desc);
510: PetscViewerXMLPutDouble(viewer, "max", NULL, max, "%e");
511: PetscViewerXMLPutDouble(viewer, "ratio", NULL, ratio, "%f");
512: if (avg>-1.0) {
513: PetscViewerXMLPutDouble(viewer, "average", NULL, avg, "%e");
514: }
515: if (tot>-1.0) {
516: PetscViewerXMLPutDouble(viewer, "total", NULL, tot, "%e");
517: }
518: PetscViewerXMLEndSection(viewer, name);
519: return(0);
520: }
522: /* Print the global performance: max, max/min, average and total of
523: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
524: */
525: static PetscErrorCode PetscPrintGlobalPerformance(PetscViewer viewer, PetscLogDouble locTotalTime)
526: {
527: PetscErrorCode ierr;
528: PetscLogDouble min, max, tot, ratio, avg;
529: PetscLogDouble flops, mem, red, mess;
530: PetscMPIInt size;
531: MPI_Comm comm;
534: PetscObjectGetComm((PetscObject)viewer,&comm);
535: MPI_Comm_size(comm, &size);
537: /* Must preserve reduction count before we go on */
538: red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
540: /* Calculate summary information */
541: PetscViewerXMLStartSection(viewer, "globalperformance", "Global performance");
543: /* Time */
544: MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
545: MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
546: MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
547: avg = (tot)/((PetscLogDouble) size);
548: if (min != 0.0) ratio = max/min;
549: else ratio = 0.0;
550: PetscPrintXMLGlobalPerformanceElement(viewer, "time", "Time (sec)", max, ratio, avg, -1.0);
552: /* Objects */
553: avg = (PetscLogDouble) petsc_numObjects;
554: MPIU_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
555: MPIU_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
556: MPIU_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
557: avg = (tot)/((PetscLogDouble) size);
558: if (min != 0.0) ratio = max/min;
559: else ratio = 0.0;
560: PetscPrintXMLGlobalPerformanceElement(viewer, "objects", "Objects", max, ratio, avg, -1.0);
562: /* Flop */
563: MPIU_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
564: MPIU_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
565: MPIU_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
566: avg = (tot)/((PetscLogDouble) size);
567: if (min != 0.0) ratio = max/min;
568: else ratio = 0.0;
569: PetscPrintXMLGlobalPerformanceElement(viewer, "mflop", "MFlop", max/1.0E6, ratio, avg/1.0E6, tot/1.0E6);
571: /* Flop/sec -- Must talk to Barry here */
572: if (locTotalTime != 0.0) flops = petsc_TotalFlops/locTotalTime;
573: else flops = 0.0;
574: MPIU_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
575: MPIU_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
576: MPIU_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
577: avg = (tot)/((PetscLogDouble) size);
578: if (min != 0.0) ratio = max/min;
579: else ratio = 0.0;
580: PetscPrintXMLGlobalPerformanceElement(viewer, "mflops", "MFlop/sec", max/1.0E6, ratio, avg/1.0E6, tot/1.0E6);
582: /* Memory */
583: PetscMallocGetMaximumUsage(&mem);
584: if (mem > 0.0) {
585: MPIU_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
586: MPIU_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
587: MPIU_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
588: avg = (tot)/((PetscLogDouble) size);
589: if (min != 0.0) ratio = max/min;
590: else ratio = 0.0;
591: PetscPrintXMLGlobalPerformanceElement(viewer, "memory", "Memory (MiB)", max/1024.0/1024.0, ratio, avg/1024.0/1024.0, tot/1024.0/1024.0);
592: }
593: /* Messages */
594: mess = 0.5*(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
595: MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
596: MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
597: MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
598: avg = (tot)/((PetscLogDouble) size);
599: if (min != 0.0) ratio = max/min;
600: else ratio = 0.0;
601: PetscPrintXMLGlobalPerformanceElement(viewer, "messagetransfers", "MPI Message Transfers", max, ratio, avg, tot);
603: /* Message Volume */
604: mess = 0.5*(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
605: MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
606: MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
607: MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
608: avg = (tot)/((PetscLogDouble) size);
609: if (min != 0.0) ratio = max/min;
610: else ratio = 0.0;
611: PetscPrintXMLGlobalPerformanceElement(viewer, "messagevolume", "MPI Message Volume (MiB)", max/1024.0/1024.0, ratio, avg/1024.0/1024.0, tot/1024.0/1024.0);
613: /* Reductions */
614: MPIU_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
615: MPIU_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
616: MPIU_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
617: if (min != 0.0) ratio = max/min;
618: else ratio = 0.0;
619: PetscPrintXMLGlobalPerformanceElement(viewer, "reductions", "MPI Reductions", max, ratio, -1, -1);
620: PetscViewerXMLEndSection(viewer, "globalperformance");
621: return(0);
622: }
624: typedef struct {
625: PetscLogEvent dftEvent;
626: NestedEventId nstEvent;
627: PetscLogEvent dftParent;
628: NestedEventId nstParent;
629: PetscBool own;
630: int depth;
631: NestedEventId* nstPath;
632: } PetscNestedEventTree;
634: /* Compare timers to sort them in the tree */
635: static int compareTreeItems(const void *item1_, const void *item2_)
636: {
637: int i;
638: PetscNestedEventTree *item1 = (PetscNestedEventTree *) item1_;
639: PetscNestedEventTree *item2 = (PetscNestedEventTree *) item2_;
640: for (i=0; i<PetscMin(item1->depth,item2->depth); i++) {
641: if (item1->nstPath[i]<item2->nstPath[i]) return -1;
642: if (item1->nstPath[i]>item2->nstPath[i]) return +1;
643: }
644: if (item1->depth < item2->depth) return -1;
645: if (item1->depth > item2->depth) return 1;
646: return 0;
647: }
648: /*
649: * Do MPI communication to get the complete, nested calling tree for all processes: there may be
650: * calls that happen in some processes, but not in others.
651: *
652: * The output, tree[nTimers] is an array of PetscNestedEventTree-structs.
653: * The tree is sorted so that the timers can be printed in the order of appearance.
654: *
655: * For tree-items which appear in the trees of multiple processes (which will be most items), the
656: * following rule is followed:
657: * + if information from my own process is available, then that is the information stored in tree.
658: * otherwise it is some other process's information.
659: */
660: static PetscErrorCode PetscLogNestedTreeCreate(PetscViewer viewer, PetscNestedEventTree **p_tree, int *p_nTimers)
661: {
662: PetscNestedEventTree *tree = NULL, *newTree;
663: int *treeIndices;
664: int nTimers, totalNTimers, i, j, iTimer0, maxDefaultTimer;
665: int yesno;
666: PetscBool done;
667: PetscErrorCode ierr;
668: int maxdepth;
669: int depth;
670: int illegalEvent;
671: int iextra;
672: NestedEventId *nstPath, *nstMyPath;
673: MPI_Comm comm;
676: PetscObjectGetComm((PetscObject)viewer,&comm);
678: /* Calculate memory needed to store everybody's information and allocate tree */
679: nTimers = 0;
680: for (i=0; i<nNestedEvents; i++) nTimers += nestedEvents[i].nParents;
682: PetscMalloc1(nTimers,&tree);
684: /* Fill tree with readily available information */
685: iTimer0 = 0;
686: maxDefaultTimer =0;
687: for (i=0; i<nNestedEvents; i++) {
688: int nParents = nestedEvents[i].nParents;
689: NestedEventId nstEvent = nestedEvents[i].nstEvent;
690: PetscLogEvent *dftParentsSorted = nestedEvents[i].dftParentsSorted;
691: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
692: for (j=0; j<nParents; j++) {
693: maxDefaultTimer = PetscMax(dftEvents[j],maxDefaultTimer);
695: tree[iTimer0+j].dftEvent = dftEvents[j];
696: tree[iTimer0+j].nstEvent = nstEvent;
697: tree[iTimer0+j].dftParent = dftParentsSorted[j];
698: tree[iTimer0+j].own = PETSC_TRUE;
700: tree[iTimer0+j].nstParent = 0;
701: tree[iTimer0+j].depth = 0;
702: tree[iTimer0+j].nstPath = NULL;
703: }
704: iTimer0 += nParents;
705: }
707: /* Calculate the global maximum for the default timer index, so array treeIndices can
708: * be allocated only once */
709: MPIU_Allreduce(&maxDefaultTimer, &j, 1, MPI_INT, MPI_MAX, comm);
710: maxDefaultTimer = j;
712: /* Find default timer's place in the tree */
713: PetscCalloc1(maxDefaultTimer+1,&treeIndices);
714: treeIndices[0] = 0;
715: for (i=0; i<nTimers; i++) {
716: PetscLogEvent dftEvent = tree[i].dftEvent;
717: treeIndices[dftEvent] = i;
718: }
720: /* Find each dftParent's nested identification */
721: for (i=0; i<nTimers; i++) {
722: PetscLogEvent dftParent = tree[i].dftParent;
723: if (dftParent!= DFT_ID_AWAKE) {
724: int j = treeIndices[dftParent];
725: tree[i].nstParent = tree[j].nstEvent;
726: }
727: }
729: /* Find depths for each timer path */
730: done = PETSC_FALSE;
731: maxdepth = 0;
732: while (!done) {
733: done = PETSC_TRUE;
734: for (i=0; i<nTimers; i++) {
735: if (tree[i].dftParent == DFT_ID_AWAKE) {
736: tree[i].depth = 1;
737: maxdepth = PetscMax(1,maxdepth);
738: } else {
739: int j = treeIndices[tree[i].dftParent];
740: depth = 1+tree[j].depth;
741: if (depth>tree[i].depth) {
742: done = PETSC_FALSE;
743: tree[i].depth = depth;
744: maxdepth = PetscMax(depth,maxdepth);
745: }
746: }
747: }
748: }
750: /* Allocate the paths in the entire tree */
751: for (i=0; i<nTimers; i++) {
752: depth = tree[i].depth;
753: PetscCalloc1(depth,&tree[i].nstPath);
754: }
756: /* Calculate the paths for all timers */
757: for (depth=1; depth<=maxdepth; depth++) {
758: for (i=0; i<nTimers; i++) {
759: if (tree[i].depth==depth) {
760: if (depth>1) {
761: int j = treeIndices[tree[i].dftParent];
762: PetscMemcpy(tree[i].nstPath,tree[j].nstPath,(depth-1)*sizeof(NestedEventId));
763: }
764: tree[i].nstPath[depth-1] = tree[i].nstEvent;
765: }
766: }
767: }
768: PetscFree(treeIndices);
770: /* Sort the tree on basis of the paths */
771: qsort(tree, nTimers, sizeof(PetscNestedEventTree), compareTreeItems);
773: /* Allocate an array to store paths */
774: depth = maxdepth;
775: MPIU_Allreduce(&depth, &maxdepth, 1, MPI_INT, MPI_MAX, comm);
776: PetscMalloc1(maxdepth+1, &nstPath);
777: PetscMalloc1(maxdepth+1, &nstMyPath);
779: /* Find an illegal nested event index (1+largest nested event index) */
780: illegalEvent = 1+nestedEvents[nNestedEvents-1].nstEvent;
781: i = illegalEvent;
782: MPIU_Allreduce(&i, &illegalEvent, 1, MPI_INT, MPI_MAX, comm);
784: /* First, detect timers which are not available in this process, but are available in others
785: * Allocate a new tree, that can contain all timers
786: * Then, fill the new tree with all (own and not-own) timers */
787: newTree= NULL;
788: for (yesno=0; yesno<=1; yesno++) {
789: depth = 1;
790: i = 0;
791: iextra = 0;
792: while (depth>0) {
793: int j;
794: PetscBool same;
796: /* Construct the next path in this process's tree:
797: * if necessary, supplement with invalid path entries */
798: depth++;
799: if (depth > maxdepth + 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Depth %d > maxdepth+1 %d",depth,maxdepth+1);
800: if (i<nTimers) {
801: for (j=0; j<tree[i].depth; j++) nstMyPath[j] = tree[i].nstPath[j];
802: for (j=tree[i].depth; j<depth; j++) nstMyPath[j] = illegalEvent;
803: } else {
804: for (j=0; j<depth; j++) nstMyPath[j] = illegalEvent;
805: }
807: /* Communicate with other processes to obtain the next path and its depth */
808: MPIU_Allreduce(nstMyPath, nstPath, depth, MPI_INT, MPI_MIN, comm);
809: for (j=depth-1; (int) j>=0; j--) {
810: if (nstPath[j]==illegalEvent) depth=j;
811: }
813: if (depth>0) {
814: /* If the path exists */
816: /* check whether the next path is the same as this process's next path */
817: same = PETSC_TRUE;
818: for (j=0; same && j<depth; j++) { same = (same && nstMyPath[j] == nstPath[j]) ? PETSC_TRUE : PETSC_FALSE;}
820: if (same) {
821: /* Register 'own path' */
822: if (newTree) newTree[i+iextra] = tree[i];
823: i++;
824: } else {
825: /* Register 'not an own path' */
826: if (newTree) {
827: newTree[i+iextra].nstEvent = nstPath[depth-1];
828: newTree[i+iextra].own = PETSC_FALSE;
829: newTree[i+iextra].depth = depth;
830: PetscMalloc1(depth, &newTree[i+iextra].nstPath);
831: for (j=0; j<depth; j++) {newTree[i+iextra].nstPath[j] = nstPath[j];}
833: newTree[i+iextra].dftEvent = 0;
834: newTree[i+iextra].dftParent = 0;
835: newTree[i+iextra].nstParent = 0;
836: }
837: iextra++;
838: }
840: }
841: }
843: /* Determine the size of the complete tree (with own and not-own timers) and allocate the new tree */
844: totalNTimers = nTimers + iextra;
845: if (!newTree) {
846: PetscMalloc1(totalNTimers, &newTree);
847: }
848: }
849: PetscFree(nstPath);
850: PetscFree(nstMyPath);
851: PetscFree(tree);
852: tree = newTree;
853: newTree = NULL;
855: /* Set return value and return */
856: *p_tree = tree;
857: *p_nTimers = totalNTimers;
858: return(0);
859: }
861: /*
862: * Delete the nested timer tree
863: */
864: static PetscErrorCode PetscLogNestedTreeDestroy(PetscNestedEventTree *tree, int nTimers)
865: {
866: int i;
867: PetscErrorCode ierr;
870: for (i=0; i<nTimers; i++) {
871: PetscFree(tree[i].nstPath);
872: }
873: PetscFree(tree);
874: return(0);
875: }
877: /* Print the global performance: max, max/min, average and total of
878: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
879: */
880: static PetscErrorCode PetscPrintXMLNestedLinePerfResults(PetscViewer viewer, const char *name, PetscLogDouble minvalue, PetscLogDouble maxvalue, PetscLogDouble minmaxtreshold)
881: {
885: PetscViewerXMLStartSection(viewer, name, NULL);
886: if (maxvalue>minvalue*minmaxtreshold) {
887: PetscViewerXMLPutDouble(viewer, "minvalue", NULL, minvalue, "%f");
888: PetscViewerXMLPutDouble(viewer, "maxvalue", NULL, maxvalue, "%f");
889: } else {
890: PetscViewerXMLPutDouble(viewer, "value", NULL, (minvalue+maxvalue)/2.0, "%g");
891: };
892: PetscViewerXMLEndSection(viewer, name);
893: return(0);
894: }
896: #define N_COMM 8
897: static PetscErrorCode PetscLogNestedTreePrintLine(PetscViewer viewer,PetscEventPerfInfo perfInfo,PetscLogDouble countsPerCall,int parentCount,int depth,const char *name,PetscLogDouble totalTime,PetscBool *isPrinted)
898: {
899: PetscLogDouble time = perfInfo.time;
900: PetscLogDouble timeMx, timeMn;
901: PetscLogDouble countsPerCallMx, countsPerCallMn;
902: PetscLogDouble reductSpeedMx, reductSpeedMn;
903: PetscLogDouble flopSpeedMx, flopSpeedMn;
904: PetscLogDouble msgSpeedMx, msgSpeedMn;
905: PetscLogDouble commarr_in[N_COMM], commarr_out[N_COMM];
907: MPI_Comm comm;
910: PetscObjectGetComm((PetscObject)viewer,&comm);
912: commarr_in[0] = time;
913: commarr_in[1] = -time;
914: MPIU_Allreduce(commarr_in, commarr_out, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
915: timeMx = commarr_out[0];
916: timeMn = -commarr_out[1];
918: commarr_in[0] = time>=timeMx*0.001 ? perfInfo.flops/time : 0;
919: commarr_in[1] = time>=timeMx*0.001 ? perfInfo.numReductions/time : 0;
920: commarr_in[2] = time>=timeMx*0.001 ? perfInfo.messageLength/time : 0;
921: commarr_in[3] = parentCount>0 ? countsPerCall : 0;
923: commarr_in[4] = time>=timeMx*0.001 ? -perfInfo.flops/time : -1e30;
924: commarr_in[5] = time>=timeMx*0.001 ? -perfInfo.numReductions/time : -1e30;
925: commarr_in[6] = time>=timeMx*0.001 ? -perfInfo.messageLength/time : -1e30;
926: commarr_in[7] = parentCount>0 ? -countsPerCall : -1e30;
928: MPIU_Allreduce(commarr_in, commarr_out, N_COMM, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
930: flopSpeedMx = commarr_out[0];
931: reductSpeedMx = commarr_out[1];
932: msgSpeedMx = commarr_out[2];
933: countsPerCallMx = commarr_out[3];
935: flopSpeedMn = -commarr_out[4];
936: reductSpeedMn = -commarr_out[5];
937: msgSpeedMn = -commarr_out[6];
938: countsPerCallMn = -commarr_out[7];
940: *isPrinted = ((timeMx/totalTime) >= THRESHOLD) ? PETSC_TRUE : PETSC_FALSE;
941: if (*isPrinted) {
942: PetscViewerXMLStartSection(viewer, "event", NULL);
943: PetscViewerXMLPutString(viewer, "name", NULL, name);
944: PetscPrintXMLNestedLinePerfResults(viewer, "time", timeMn/totalTime*100.0, timeMx/totalTime*100.0, 1.02);
946: if (countsPerCallMx<1.01 && countsPerCallMn>0.99) {
947: /* One call per parent */
948: } else {
949: PetscPrintXMLNestedLinePerfResults(viewer, "ncalls", countsPerCallMn, countsPerCallMx, 1.02);
950: }
952: if (flopSpeedMx<0.01) {
953: /* NO flops: don't print */
954: } else {
955: PetscPrintXMLNestedLinePerfResults(viewer, "mflops", flopSpeedMn/1e6, flopSpeedMx/1e6, 1.05);
956: }
958: if (msgSpeedMx<0.01) {
959: /* NO msgs: don't print */
960: } else {
961: PetscPrintXMLNestedLinePerfResults(viewer, "mbps", msgSpeedMn/1024.0/1024.0, msgSpeedMx/1024.0/1024.0, 1.05);
962: }
964: if (reductSpeedMx<0.01) {
965: /* NO reductions: don't print */
966: } else {
967: PetscPrintXMLNestedLinePerfResults(viewer, "nreductsps", reductSpeedMn, reductSpeedMx, 1.05);
968: }
969: }
970: return(0);
971: }
973: /* Count the number of times the parent event was called */
975: static int countParents( const PetscNestedEventTree *tree, PetscEventPerfInfo *eventPerfInfo, int i)
976: {
977: if (tree[i].depth<=1) {
978: return 1; /* Main event: only once */
979: } else if (!tree[i].own) {
980: return 1; /* This event didn't happen in this process, but did in another */
981: } else {
982: int iParent;
983: for (iParent=i-1; iParent>=0; iParent--) {
984: if (tree[iParent].depth == tree[i].depth-1) break;
985: }
986: if (tree[iParent].depth != tree[i].depth-1) {
987: printf("\n\n ***** Internal error: cannot find parent ****\n\n");
988: return -2;
989: } else {
990: PetscLogEvent dftEvent = tree[iParent].dftEvent;
991: return eventPerfInfo[dftEvent].count;
992: }
993: }
994: }
996: typedef struct {
997: int id;
998: PetscLogDouble val;
999: } PetscSortItem;
1001: static int compareSortItems(const void *item1_, const void *item2_)
1002: {
1003: PetscSortItem *item1 = (PetscSortItem *) item1_;
1004: PetscSortItem *item2 = (PetscSortItem *) item2_;
1005: if (item1->val > item2->val) return -1;
1006: if (item1->val < item2->val) return +1;
1007: return 0;
1008: }
1010: static PetscErrorCode PetscLogNestedTreePrint(PetscViewer viewer, PetscNestedEventTree *tree, int nTimers, int iStart, PetscLogDouble totalTime)
1011: {
1012: int depth = tree[iStart].depth;
1013: const char *name;
1014: int parentCount, nChildren;
1015: PetscSortItem *children;
1016: PetscErrorCode ierr;
1017: const int stage = MAINSTAGE;
1018: PetscStageLog stageLog;
1019: PetscEventRegInfo *eventRegInfo;
1020: PetscEventPerfInfo *eventPerfInfo;
1021: PetscEventPerfInfo myPerfInfo, otherPerfInfo, selfPerfInfo;
1022: PetscLogDouble countsPerCall;
1023: PetscBool wasPrinted;
1024: PetscBool childWasPrinted;
1025: MPI_Comm comm;
1027: /* Look up the name of the event and its PerfInfo */
1028: PetscLogGetStageLog(&stageLog);
1029: eventRegInfo = stageLog->eventLog->eventInfo;
1030: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1031: name = eventRegInfo[(PetscLogEvent)tree[iStart].nstEvent].name;
1033: PetscObjectGetComm((PetscObject)viewer,&comm);
1035: /* Count the number of child processes */
1036: nChildren = 0;
1037: {
1038: int i;
1039: for (i=iStart+1; i<nTimers; i++) {
1040: if (tree[i].depth <= depth) break;
1041: if (tree[i].depth == depth + 1) nChildren++;
1042: }
1043: }
1045: if (nChildren>0) {
1046: /* Create an array for the id-s and maxTimes of the children,
1047: * leaving 2 spaces for self-time and other-time */
1048: int i;
1049: PetscLogDouble *times, *maxTimes;
1051: PetscMalloc1(nChildren+2,&children);
1052: nChildren = 0;
1053: for (i=iStart+1; i<nTimers; i++) {
1054: if (tree[i].depth<=depth) break;
1055: if (tree[i].depth == depth + 1) {
1056: children[nChildren].id = i;
1057: children[nChildren].val = eventPerfInfo[tree[i].dftEvent].time ;
1058: nChildren++;
1059: }
1060: }
1062: /* Calculate the children's maximum times, to see whether children will be ignored or printed */
1063: PetscMalloc1(nChildren,×);
1064: for (i=0; i<nChildren; i++) { times[i] = children[i].val; }
1066: PetscMalloc1(nChildren,&maxTimes);
1067: MPIU_Allreduce(times, maxTimes, nChildren, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1068: PetscFree(times);
1070: for (i=0; i<nChildren; i++) { children[i].val = maxTimes[i]; }
1071: PetscFree(maxTimes);
1072: }
1074: if (!tree[iStart].own) {
1075: /* Set values for a timer that was not activated in this process
1076: * (but was, in other processes of this run) */
1077: PetscMemzero(&myPerfInfo,sizeof(myPerfInfo));
1079: selfPerfInfo = myPerfInfo;
1080: otherPerfInfo = myPerfInfo;
1082: parentCount = 1;
1083: countsPerCall = 0;
1084: } else {
1085: /* Set the values for a timer that was activated in this process */
1086: int i;
1087: PetscLogEvent dftEvent = tree[iStart].dftEvent;
1089: parentCount = countParents( tree, eventPerfInfo, iStart);
1090: myPerfInfo = eventPerfInfo[dftEvent];
1091: countsPerCall = (PetscLogDouble) myPerfInfo.count / (PetscLogDouble) parentCount;
1093: selfPerfInfo = myPerfInfo;
1094: otherPerfInfo.time = 0;
1095: otherPerfInfo.flops = 0;
1096: otherPerfInfo.numMessages = 0;
1097: otherPerfInfo.messageLength = 0;
1098: otherPerfInfo.numReductions = 0;
1100: for (i=0; i<nChildren; i++) {
1101: /* For all child counters: subtract the child values from self-timers */
1103: PetscLogEvent dftChild = tree[children[i].id].dftEvent;
1104: PetscEventPerfInfo childPerfInfo = eventPerfInfo[dftChild];
1106: selfPerfInfo.time -= childPerfInfo.time;
1107: selfPerfInfo.flops -= childPerfInfo.flops;
1108: selfPerfInfo.numMessages -= childPerfInfo.numMessages;
1109: selfPerfInfo.messageLength -= childPerfInfo.messageLength;
1110: selfPerfInfo.numReductions -= childPerfInfo.numReductions;
1112: if ((children[i].val/totalTime) < THRESHOLD) {
1113: /* Add them to 'other' if the time is ignored in the output */
1114: otherPerfInfo.time += childPerfInfo.time;
1115: otherPerfInfo.flops += childPerfInfo.flops;
1116: otherPerfInfo.numMessages += childPerfInfo.numMessages;
1117: otherPerfInfo.messageLength += childPerfInfo.messageLength;
1118: otherPerfInfo.numReductions += childPerfInfo.numReductions;
1119: }
1120: }
1121: }
1123: /* Main output for this timer */
1124: PetscLogNestedTreePrintLine(viewer, myPerfInfo, countsPerCall, parentCount, depth, name, totalTime, &wasPrinted);
1126: /* Now print the lines for the children */
1127: if (nChildren > 0) {
1128: /* Calculate max-times for 'self' and 'other' */
1129: int i;
1130: PetscLogDouble times[2], maxTimes[2];
1131: times[0] = selfPerfInfo.time; times[1] = otherPerfInfo.time;
1132: MPIU_Allreduce(times, maxTimes, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1133: children[nChildren+0].id = -1;
1134: children[nChildren+0].val = maxTimes[0];
1135: children[nChildren+1].id = -2;
1136: children[nChildren+1].val = maxTimes[1];
1138: /* Now sort the children (including 'self' and 'other') on total time */
1139: qsort(children, nChildren+2, sizeof(PetscSortItem), compareSortItems);
1141: /* Print (or ignore) the children in ascending order of total time */
1142: PetscViewerXMLStartSection(viewer,"events", NULL);
1143: for (i=0; i<nChildren+2; i++) {
1144: if ((children[i].val/totalTime) < THRESHOLD) {
1145: /* ignored: no output */
1146: } else if (children[i].id==-1) {
1147: PetscLogNestedTreePrintLine(viewer, selfPerfInfo, 1, parentCount, depth+1, "self", totalTime, &childWasPrinted);
1148: if (childWasPrinted) {
1149: PetscViewerXMLEndSection(viewer,"event");
1150: }
1151: } else if (children[i].id==-2) {
1152: size_t len;
1153: char *otherName;
1155: PetscStrlen(name,&len);
1156: PetscMalloc1(len+16,&otherName);
1157: PetscSNPrintf(otherName,len+16,"%s: other-timed",name);
1158: PetscLogNestedTreePrintLine(viewer, otherPerfInfo, 1, 1, depth+1, otherName, totalTime, &childWasPrinted);
1159: PetscFree(otherName);
1160: if (childWasPrinted) {
1161: PetscViewerXMLEndSection(viewer,"event");
1162: }
1163: } else {
1164: /* Print the child with a recursive call to this function */
1165: PetscLogNestedTreePrint(viewer, tree, nTimers, children[i].id, totalTime);
1166: }
1167: }
1168: PetscViewerXMLEndSection(viewer,"events");
1169: PetscFree(children);
1170: }
1172: if (wasPrinted) {
1173: PetscViewerXMLEndSection(viewer, "event");
1174: }
1175: return 0;
1176: }
1178: static PetscErrorCode PetscLogNestedTreePrintTop(PetscViewer viewer, PetscNestedEventTree *tree, int nTimers, PetscLogDouble totalTime)
1179: {
1180: int i, nChildren;
1181: PetscSortItem *children;
1182: PetscErrorCode ierr;
1183: const int stage = MAINSTAGE;
1184: PetscStageLog stageLog;
1185: PetscEventPerfInfo *eventPerfInfo;
1186: MPI_Comm comm;
1189: PetscObjectGetComm((PetscObject)viewer,&comm);
1191: /* Look up the PerfInfo */
1192: PetscLogGetStageLog(&stageLog);
1193: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1195: /* Count the number of child processes, and count total time */
1196: nChildren = 0;
1197: for (i=0; i<nTimers; i++)
1198: if (tree[i].depth==1) nChildren++;
1200: if (nChildren>0) {
1201: /* Create an array for the id-s and maxTimes of the children,
1202: * leaving 2 spaces for self-time and other-time */
1203: PetscLogDouble *times, *maxTimes;
1205: PetscMalloc1(nChildren,&children);
1206: nChildren = 0;
1207: for (i=0; i<nTimers; i++) {
1208: if (tree[i].depth == 1) {
1209: children[nChildren].id = i;
1210: children[nChildren].val = eventPerfInfo[tree[i].dftEvent].time ;
1211: nChildren++;
1212: }
1213: }
1215: /* Calculate the children's maximum times, to sort them */
1216: PetscMalloc1(nChildren,×);
1217: for (i=0; i<nChildren; i++) { times[i] = children[i].val; }
1219: PetscMalloc1(nChildren,&maxTimes);
1220: MPIU_Allreduce(times, maxTimes, nChildren, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1221: PetscFree(times);
1223: for (i=0; i<nChildren; i++) { children[i].val = maxTimes[i]; }
1224: PetscFree(maxTimes);
1226: /* Now sort the children on total time */
1227: qsort(children, nChildren, sizeof(PetscSortItem), compareSortItems);
1228: /* Print (or ignore) the children in ascending order of total time */
1229: PetscViewerXMLStartSection(viewer, "timertree", "Timings tree");
1230: PetscViewerXMLPutDouble(viewer, "totaltime", NULL, totalTime, "%f");
1231: PetscViewerXMLPutDouble(viewer, "timethreshold", NULL, thresholdTime, "%f");
1233: for (i=0; i<nChildren; i++) {
1234: if ((children[i].val/totalTime) < THRESHOLD) {
1235: /* ignored: no output */
1236: } else {
1237: /* Print the child with a recursive call to this function */
1238: PetscLogNestedTreePrint(viewer, tree, nTimers, children[i].id, totalTime);
1239: }
1240: }
1241: PetscViewerXMLEndSection(viewer, "timertree");
1242: PetscFree(children);
1243: }
1244: return(0);
1245: }
1247: typedef struct {
1248: char *name;
1249: PetscLogDouble time;
1250: PetscLogDouble flops;
1251: PetscLogDouble numMessages;
1252: PetscLogDouble messageLength;
1253: PetscLogDouble numReductions;
1254: } PetscSelfTimer;
1256: static PetscErrorCode PetscCalcSelfTime(PetscViewer viewer, PetscSelfTimer **p_self, int *p_nstMax)
1257: {
1258: PetscErrorCode ierr;
1259: const int stage = MAINSTAGE;
1260: PetscStageLog stageLog;
1261: PetscEventRegInfo *eventRegInfo;
1262: PetscEventPerfInfo *eventPerfInfo;
1263: PetscSelfTimer *selftimes;
1264: PetscSelfTimer *totaltimes;
1265: NestedEventId *nstEvents;
1266: int i, j, maxDefaultTimer;
1267: NestedEventId nst;
1268: PetscLogEvent dft;
1269: int nstMax, nstMax_local;
1270: MPI_Comm comm;
1273: PetscObjectGetComm((PetscObject)viewer,&comm);
1274: PetscLogGetStageLog(&stageLog);
1275: eventRegInfo = stageLog->eventLog->eventInfo;
1276: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1278: /* For each default timer, calculate the (one) nested timer that it corresponds to. */
1279: maxDefaultTimer =0;
1280: for (i=0; i<nNestedEvents; i++) {
1281: int nParents = nestedEvents[i].nParents;
1282: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1283: for (j=0; j<nParents; j++) maxDefaultTimer = PetscMax(dftEvents[j],maxDefaultTimer);
1284: }
1285: PetscMalloc1(maxDefaultTimer+1,&nstEvents);
1286: for (dft=0; dft<maxDefaultTimer; dft++) {nstEvents[dft] = 0;}
1287: for (i=0; i<nNestedEvents; i++) {
1288: int nParents = nestedEvents[i].nParents;
1289: NestedEventId nstEvent = nestedEvents[i].nstEvent;
1290: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1291: for (j=0; j<nParents; j++) nstEvents[dftEvents[j]] = nstEvent;
1292: }
1294: /* Calculate largest nested event-ID */
1295: nstMax_local = 0;
1296: for (i=0; i<nNestedEvents; i++) nstMax_local = PetscMax(nestedEvents[i].nstEvent,nstMax_local);
1297: MPIU_Allreduce(&nstMax_local, &nstMax, 1, MPI_INT, MPI_MAX, comm);
1300: /* Initialize all total-times with zero */
1301: PetscMalloc1(nstMax+1,&selftimes);
1302: PetscMalloc1(nstMax+1,&totaltimes);
1303: for (nst=0; nst<=nstMax; nst++) {
1304: totaltimes[nst].time = 0;
1305: totaltimes[nst].flops = 0;
1306: totaltimes[nst].numMessages = 0;
1307: totaltimes[nst].messageLength = 0;
1308: totaltimes[nst].numReductions = 0;
1309: totaltimes[nst].name = NULL;
1310: }
1312: /* Calculate total-times */
1313: for (i=0; i<nNestedEvents; i++) {
1314: const int nParents = nestedEvents[i].nParents;
1315: const NestedEventId nstEvent = nestedEvents[i].nstEvent;
1316: const PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1317: for (j=0; j<nParents; j++) {
1318: const PetscLogEvent dftEvent = dftEvents[j];
1319: totaltimes[nstEvent].time += eventPerfInfo[dftEvent].time;
1320: totaltimes[nstEvent].flops += eventPerfInfo[dftEvent].flops;
1321: totaltimes[nstEvent].numMessages += eventPerfInfo[dftEvent].numMessages;
1322: totaltimes[nstEvent].messageLength += eventPerfInfo[dftEvent].messageLength;
1323: totaltimes[nstEvent].numReductions += eventPerfInfo[dftEvent].numReductions;
1324: }
1325: totaltimes[nstEvent].name = eventRegInfo[(PetscLogEvent)nstEvent].name;
1326: }
1328: /* Initialize: self-times := totaltimes */
1329: for (nst=0; nst<=nstMax; nst++) { selftimes[nst] = totaltimes[nst]; }
1331: /* Subtract timed subprocesses from self-times */
1332: for (i=0; i<nNestedEvents; i++) {
1333: const int nParents = nestedEvents[i].nParents;
1334: const PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1335: const NestedEventId *dftParentsSorted = nestedEvents[i].dftParentsSorted;
1336: for (j=0; j<nParents; j++) {
1337: if (dftParentsSorted[j] != DFT_ID_AWAKE) {
1338: const PetscLogEvent dftEvent = dftEvents[j];
1339: const NestedEventId nstParent = nstEvents[dftParentsSorted[j]];
1340: selftimes[nstParent].time -= eventPerfInfo[dftEvent].time;
1341: selftimes[nstParent].flops -= eventPerfInfo[dftEvent].flops;
1342: selftimes[nstParent].numMessages -= eventPerfInfo[dftEvent].numMessages;
1343: selftimes[nstParent].messageLength -= eventPerfInfo[dftEvent].messageLength;
1344: selftimes[nstParent].numReductions -= eventPerfInfo[dftEvent].numReductions;
1345: }
1346: }
1347: }
1349: PetscFree(nstEvents);
1350: PetscFree(totaltimes);
1352: /* Set outputs */
1353: *p_self = selftimes;
1354: *p_nstMax = nstMax;
1355: return(0);
1356: }
1358: static PetscErrorCode PetscPrintSelfTime(PetscViewer viewer, const PetscSelfTimer *selftimes, int nstMax, PetscLogDouble totalTime)
1359: {
1360: PetscErrorCode ierr;
1361: int i;
1362: NestedEventId nst;
1363: PetscSortItem *sortSelfTimes;
1364: PetscLogDouble *times, *maxTimes;
1365: PetscStageLog stageLog;
1366: PetscEventRegInfo *eventRegInfo;
1367: const int dum_depth = 1, dum_count=1, dum_parentcount=1;
1368: PetscBool wasPrinted;
1369: MPI_Comm comm;
1372: PetscObjectGetComm((PetscObject)viewer,&comm);
1373: PetscLogGetStageLog(&stageLog);
1374: eventRegInfo = stageLog->eventLog->eventInfo;
1376: PetscMalloc1(nstMax+1,×);
1377: PetscMalloc1(nstMax+1,&maxTimes);
1378: for (nst=0; nst<=nstMax; nst++) { times[nst] = selftimes[nst].time;}
1379: MPIU_Allreduce(times, maxTimes, nstMax+1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1380: PetscFree(times);
1382: PetscMalloc1(nstMax+1,&sortSelfTimes);
1384: /* Sort the self-timers on basis of the largest time needed */
1385: for (nst=0; nst<=nstMax; nst++) {
1386: sortSelfTimes[nst].id = nst;
1387: sortSelfTimes[nst].val = maxTimes[nst];
1388: }
1389: PetscFree(maxTimes);
1390: qsort(sortSelfTimes, nstMax+1, sizeof(PetscSortItem), compareSortItems);
1392: PetscViewerXMLStartSection(viewer, "selftimertable", "Self-timings");
1393: PetscViewerXMLPutDouble(viewer, "totaltime", NULL, totalTime, "%f");
1395: for (i=0; i<=nstMax; i++) {
1396: if ((sortSelfTimes[i].val/totalTime) >= THRESHOLD) {
1397: NestedEventId nstEvent = sortSelfTimes[i].id;
1398: const char *name = eventRegInfo[(PetscLogEvent)nstEvent].name;
1399: PetscEventPerfInfo selfPerfInfo;
1401: selfPerfInfo.time = selftimes[nstEvent].time ;
1402: selfPerfInfo.flops = selftimes[nstEvent].flops;
1403: selfPerfInfo.numMessages = selftimes[nstEvent].numMessages;
1404: selfPerfInfo.messageLength = selftimes[nstEvent].messageLength;
1405: selfPerfInfo.numReductions = selftimes[nstEvent].numReductions;
1407: PetscLogNestedTreePrintLine(viewer, selfPerfInfo, dum_count, dum_parentcount, dum_depth, name, totalTime, &wasPrinted);
1408: if (wasPrinted){
1409: PetscViewerXMLEndSection(viewer, "event");
1410: }
1411: }
1412: }
1413: PetscViewerXMLEndSection(viewer, "selftimertable");
1414: PetscFree(sortSelfTimes);
1415: return(0);
1416: }
1418: PetscErrorCode PetscLogView_Nested(PetscViewer viewer)
1419: {
1420: PetscErrorCode ierr;
1421: PetscLogDouble locTotalTime, globTotalTime;
1422: PetscNestedEventTree *tree = NULL;
1423: PetscSelfTimer *selftimers = NULL;
1424: int nTimers = 0, nstMax = 0;
1425: MPI_Comm comm;
1428: PetscObjectGetComm((PetscObject)viewer,&comm);
1429: PetscViewerInitASCII_XML(viewer);
1430: PetscViewerASCIIPrintf(viewer, "<!-- PETSc Performance Summary: -->\n");
1431: PetscViewerXMLStartSection(viewer, "petscroot", NULL);
1433: /* Get the total elapsed time, local and global maximum */
1434: PetscTime(&locTotalTime); locTotalTime -= petsc_BaseTime;
1435: MPIU_Allreduce(&locTotalTime, &globTotalTime, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1437: /* Print global information about this run */
1438: PetscPrintExeSpecs(viewer);
1439: PetscPrintGlobalPerformance(viewer, locTotalTime);
1441: /* Collect nested timer tree info from all processes */
1442: PetscLogNestedTreeCreate(viewer, &tree, &nTimers);
1443: PetscLogNestedTreePrintTop(viewer, tree, nTimers, globTotalTime);
1444: PetscLogNestedTreeDestroy(tree, nTimers);
1446: /* Calculate self-time for all (not-nested) events */
1447: PetscCalcSelfTime(viewer, &selftimers, &nstMax);
1448: PetscPrintSelfTime(viewer, selftimers, nstMax, globTotalTime);
1449: PetscFree(selftimers);
1451: PetscViewerXMLEndSection(viewer, "petscroot");
1452: PetscViewerFinalASCII_XML(viewer);
1453: return(0);
1454: }
1456: PETSC_EXTERN PetscErrorCode PetscASend(int count, int datatype)
1457: {
1460: #endif
1463: petsc_send_ct++;
1465: PetscMPITypeSize(&petsc_send_len,count, MPI_Type_f2c((MPI_Fint) datatype));
1466: #endif
1467: return(0);
1468: }
1470: PETSC_EXTERN PetscErrorCode PetscARecv(int count, int datatype)
1471: {
1474: #endif
1477: petsc_recv_ct++;
1479: PetscMPITypeSize(&petsc_recv_len,count, MPI_Type_f2c((MPI_Fint) datatype));
1480: #endif
1481: return(0);
1482: }
1484: PETSC_EXTERN PetscErrorCode PetscAReduce()
1485: {
1487: petsc_allreduce_ct++;
1488: return(0);
1489: }
1491: #endif