Actual source code: xmllogevent.c
petsc-3.12.5 2020-03-29
1: /*************************************************************************************
2: * M A R I T I M E R E S E A R C H I N S T I T U T E N E T H E R L A N D S *
3: *************************************************************************************
4: * authors: Bas van 't Hof, Koos Huijssen, Christiaan M. Klaij *
5: *************************************************************************************
6: * content: Support for nested PetscTimers *
7: *************************************************************************************/
8: #include <petsclog.h>
9: #include <petsc/private/logimpl.h>
10: #include <petsctime.h>
11: #include <petscviewer.h>
12: #include "../src/sys/logging/xmlviewer.h"
14: #if defined(PETSC_USE_LOG)
16: /*
17: * Support for nested PetscTimers
18: *
19: * PetscTimers keep track of a lot of useful information: Wall clock times,
20: * message passing statistics, flop counts. Information about the nested structure
21: * of the timers is lost. Example:
22: *
23: * 7:30 Start: awake
24: * 7:30 Start: morning routine
25: * 7:40 Start: eat
26: * 7:49 Done: eat
27: * 7:43 Done: morning routine
28: * 8:15 Start: work
29: * 12:15 Start: eat
30: * 12:45 Done: eat
31: * 16:00 Done: work
32: * 16:30 Start: evening routine
33: * 18:30 Start: eat
34: * 19:15 Done: eat
35: * 22:00 Done: evening routine
36: * 22:00 Done: awake
37: *
38: * Petsc timers provide the following timer results:
39: *
40: * awake: 1 call 14:30 hours
41: * morning routine: 1 call 0:13 hours
42: * eat: 3 calls 1:24 hours
43: * work: 1 call 7:45 hours
44: * evening routine 1 call 5:30 hours
45: *
46: * Nested timers can be used to get the following table:
47: *
48: * [1 call]: awake 14:30 hours
49: * [1 call]: morning routine 0:13 hours ( 2 % of awake)
50: * [1 call]: eat 0:09 hours (69 % of morning routine)
51: * rest (morning routine) 0:04 hours (31 % of morning routine)
52: * [1 call]: work 7:45 hours (53 % of awake)
53: * [1 call]: eat 0:30 hours ( 6 % of work)
54: * rest (work) 7:15 hours (94 % of work)
55: * [1 call]: evening routine 5:30 hours (38 % of awake)
56: * [1 call]: eat 0:45 hours (14 % of evening routine)
57: * rest (evening routine) 4:45 hours (86 % of morning routine)
58: *
59: * We ignore the concept of 'stages', because these seem to be conflicting notions, or at least,
60: * the nested timers make the stages unnecessary.
61: *
62: */
64: /*
65: * Data structures for keeping track of nested timers:
66: *
67: * nestedEvents: information about the timers that have actually been activated
68: * dftParentActive: if a timer is started now, it is part of (nested inside) the dftParentActive
69: *
70: * The Default-timers are used to time the nested timers. Every nested timer corresponds to
71: * (one or more) default timers, where one of the default timers has the same event-id as the
72: * nested one.
73: *
74: * Because of the risk of confusion between nested timer ids and default timer ids, we
75: * introduce a typedef for nested events (NestedEventId) and use the existing type PetscLogEvent
76: * only for default events. Also, all nested event variables are prepended with 'nst', and
77: * default timers with 'dft'.
78: */
80: #define DFT_ID_AWAKE -1
82: typedef PetscLogEvent NestedEventId;
83: typedef struct {
84: NestedEventId nstEvent; /* event-code for this nested event, argument 'event' in PetscLogEventStartNested */
85: int nParents; /* number of 'dftParents': the default timer which was the dftParentActive when this nested timer was activated */
86: PetscLogEvent *dftParentsSorted; /* The default timers which were the dftParentActive when this nested event was started */
87: PetscLogEvent *dftEvents; /* The default timers which represent the different 'instances' of this nested event */
89: PetscLogEvent *dftParents; /* The default timers which were the dftParentActive when this nested event was started */
90: PetscLogEvent *dftEventsSorted; /* The default timers which represent the different 'instances' of this nested event */
91: } PetscNestedEvent;
93: static PetscLogEvent dftParentActive = DFT_ID_AWAKE;
94: static int nNestedEvents = 0;
95: static int nNestedEventsAllocated = 0;
96: static PetscNestedEvent *nestedEvents = NULL;
97: static PetscLogDouble thresholdTime = 0.01; /* initial value was 0.1 */
99: #define THRESHOLD (thresholdTime/100.0+1e-12)
101: static PetscErrorCode PetscLogEventBeginNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4);
102: static PetscErrorCode PetscLogEventEndNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4);
103: PETSC_INTERN PetscErrorCode PetscLogView_Nested(PetscViewer);
106: /*@C
107: PetscLogNestedBegin - Turns on nested logging of objects and events. This logs flop
108: rates and object creation and should not slow programs down too much.
110: Logically Collective over PETSC_COMM_WORLD
112: Options Database Keys:
113: . -log_view :filename.xml:ascii_xml - Prints an XML summary of flop and timing information to the file
115: Usage:
116: .vb
117: PetscInitialize(...);
118: PetscLogNestedBegin();
119: ... code ...
120: PetscLogView(viewer);
121: PetscFinalize();
122: .ve
124: Level: advanced
126: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogView(), PetscLogTraceBegin(), PetscLogDefaultBegin()
127: @*/
128: PetscErrorCode PetscLogNestedBegin(void)
129: {
133: if (nestedEvents) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_COR,"nestedEvents already allocated");
135: nNestedEventsAllocated = 10;
136: PetscMalloc1(nNestedEventsAllocated,&nestedEvents);
137: dftParentActive = DFT_ID_AWAKE;
138: nNestedEvents =1;
140: /* 'Awake' is nested event 0. It has no parents */
141: nestedEvents[0].nstEvent = 0;
142: nestedEvents[0].nParents = 0;
143: nestedEvents[0].dftParentsSorted = NULL;
144: nestedEvents[0].dftEvents = NULL;
145: nestedEvents[0].dftParents = NULL;
146: nestedEvents[0].dftEventsSorted = NULL;
148: PetscLogSet(PetscLogEventBeginNested,PetscLogEventEndNested);
149: return(0);
150: }
152: /* Delete the data structures for the nested timers */
153: PetscErrorCode PetscLogNestedEnd(void)
154: {
156: int i;
159: if (!nestedEvents) return(0);
160: for (i=0; i<nNestedEvents; i++) {
161: PetscFree4(nestedEvents[i].dftParentsSorted,nestedEvents[i].dftEventsSorted,nestedEvents[i].dftParents,nestedEvents[i].dftEvents);
162: }
163: PetscFree(nestedEvents);
164: nestedEvents = NULL;
165: nNestedEvents = 0;
166: nNestedEventsAllocated = 0;
167: return(0);
168: }
171: /*
172: UTILITIES: FIND STUFF IN SORTED ARRAYS
174: dftIndex - index to be found
175: dftArray - sorted array of PetscLogEvent-ids
176: narray - dimension of dftArray
177: entry - entry in the array where dftIndex may be found;
179: if dftArray[entry] != dftIndex, then dftIndex is not part of dftArray
180: In that case, the dftIndex can be inserted at this entry.
181: */
182: static PetscErrorCode PetscLogEventFindDefaultTimer(PetscLogEvent dftIndex,const PetscLogEvent *dftArray,int narray,int *entry)
183: {
185: if (narray==0 || dftIndex <= dftArray[0]) {
186: *entry = 0;
187: } else if (dftIndex > dftArray[narray-1]) {
188: *entry = narray;
189: } else {
190: int ihigh = narray-1, ilow=0;
191: while (ihigh>ilow) {
192: const int imiddle = (ihigh+ilow)/2;
193: if (dftArray[imiddle] > dftIndex) {
194: ihigh = imiddle;
195: } else if (dftArray[imiddle]<dftIndex) {
196: ilow = imiddle+1;
197: } else {
198: ihigh = imiddle;
199: ilow = imiddle;
200: }
201: }
202: *entry = ihigh;
203: }
204: return(0);
205: }
207: /*
208: Utility: find the nested event with given identification
210: nstEvent - Nested event to be found
211: entry - entry in the nestedEvents where nstEvent may be found;
213: if nestedEvents[entry].nstEvent != nstEvent, then index is not part of iarray
214: */
215: static PetscErrorCode PetscLogEventFindNestedTimer(NestedEventId nstEvent,int *entry)
216: {
218: if (nNestedEvents==0 || nstEvent <= nestedEvents[0].nstEvent) {
219: *entry = 0;
220: } else if (nstEvent > nestedEvents[nNestedEvents-1].nstEvent) {
221: *entry = nNestedEvents;
222: } else {
223: int ihigh = nNestedEvents-1, ilow = 0;
224: while (ihigh>ilow) {
225: const int imiddle = (ihigh+ilow)/2;
226: if (nestedEvents[imiddle].nstEvent > nstEvent) {
227: ihigh = imiddle;
228: } else if (nestedEvents[imiddle].nstEvent<nstEvent) {
229: ilow = imiddle+1;
230: } else {
231: ihigh = imiddle;
232: ilow = imiddle;
233: }
234: }
235: *entry = ihigh;
236: }
237: return(0);
238: }
240: /*
241: Nested logging is not prepared yet to support user-defined logging stages, so for now we force logging on the main stage.
242: Using PetscLogStage{Push/Pop}() would be more appropriate, but these two calls do extra bookkeeping work we don't need.
243: */
245: #define MAINSTAGE 0
247: static PetscLogStage savedStage = 0;
249: PETSC_STATIC_INLINE PetscErrorCode PetscLogStageOverride(void)
250: {
251: PetscStageLog stageLog = petsc_stageLog;
255: if (stageLog->curStage == MAINSTAGE) return(0);
256: savedStage = stageLog->curStage;
257: stageLog->curStage = MAINSTAGE;
258: PetscIntStackPush(stageLog->stack, MAINSTAGE);
259: return(0);
260: }
262: PETSC_STATIC_INLINE PetscErrorCode PetscLogStageRestore(void)
263: {
264: PetscStageLog stageLog = petsc_stageLog;
268: if (savedStage == MAINSTAGE) return(0);
269: stageLog->curStage = savedStage;
270: PetscIntStackPop(stageLog->stack, &savedStage);
271: return(0);
272: }
274: /******************************************************************************************/
275: /* Start a nested event */
276: static PetscErrorCode PetscLogEventBeginNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
277: {
278: PetscErrorCode ierr;
279: int entry, pentry, tentry,i;
280: PetscLogEvent dftEvent;
283: PetscLogEventFindNestedTimer(nstEvent, &entry);
284: if (entry>=nNestedEvents || nestedEvents[entry].nstEvent != nstEvent) {
285: /* Nested event doesn't exist yet: create it */
287: if (nNestedEvents==nNestedEventsAllocated) {
288: /* Enlarge and re-allocate nestedEvents if needed */
289: PetscNestedEvent *tmp = nestedEvents;
290: PetscMalloc1(2*nNestedEvents,&nestedEvents);
291: nNestedEventsAllocated*=2;
292: PetscArraycpy(nestedEvents, tmp, nNestedEvents);
293: PetscFree(tmp);
294: }
296: /* Clear space in nestedEvents for new nested event */
297: nNestedEvents++;
298: for (i = nNestedEvents-1; i>entry; i--) {
299: nestedEvents[i] = nestedEvents[i-1];
300: }
302: /* Create event in nestedEvents */
303: nestedEvents[entry].nstEvent = nstEvent;
304: nestedEvents[entry].nParents=1;
305: PetscMalloc4(1,&nestedEvents[entry].dftParentsSorted,1,&nestedEvents[entry].dftEventsSorted,1,&nestedEvents[entry].dftParents,1,&nestedEvents[entry].dftEvents);
307: /* Fill in new event */
308: pentry = 0;
309: dftEvent = (PetscLogEvent) nstEvent;
311: nestedEvents[entry].nstEvent = nstEvent;
312: nestedEvents[entry].dftParents[pentry] = dftParentActive;
313: nestedEvents[entry].dftEvents[pentry] = dftEvent;
314: nestedEvents[entry].dftParentsSorted[pentry] = dftParentActive;
315: nestedEvents[entry].dftEventsSorted[pentry] = dftEvent;
317: } else {
318: /* Nested event exists: find current dftParentActive among parents */
319: PetscLogEvent *dftParentsSorted = nestedEvents[entry].dftParentsSorted;
320: PetscLogEvent *dftEvents = nestedEvents[entry].dftEvents;
321: int nParents = nestedEvents[entry].nParents;
323: PetscLogEventFindDefaultTimer( dftParentActive, dftParentsSorted, nParents, &pentry);
325: if (pentry>=nParents || dftParentActive != dftParentsSorted[pentry]) {
326: /* dftParentActive not in the list: add it to the list */
327: int i;
328: PetscLogEvent *dftParents = nestedEvents[entry].dftParents;
329: PetscLogEvent *dftEventsSorted = nestedEvents[entry].dftEventsSorted;
330: char name[100];
332: /* Register a new default timer */
333: sprintf(name, "%d -> %d", (int) dftParentActive, (int) nstEvent);
334: PetscLogEventRegister(name, 0, &dftEvent);
335: PetscLogEventFindDefaultTimer( dftEvent, dftEventsSorted, nParents, &tentry);
337: /* Reallocate parents and dftEvents to make space for new parent */
338: PetscMalloc4(1+nParents,&nestedEvents[entry].dftParentsSorted,1+nParents,&nestedEvents[entry].dftEventsSorted,1+nParents,&nestedEvents[entry].dftParents,1+nParents,&nestedEvents[entry].dftEvents);
339: PetscArraycpy(nestedEvents[entry].dftParentsSorted, dftParentsSorted, nParents);
340: PetscArraycpy(nestedEvents[entry].dftEventsSorted, dftEventsSorted, nParents);
341: PetscArraycpy(nestedEvents[entry].dftParents, dftParents, nParents);
342: PetscArraycpy(nestedEvents[entry].dftEvents, dftEvents, nParents);
343: PetscFree4(dftParentsSorted,dftEventsSorted,dftParents,dftEvents);
345: dftParents = nestedEvents[entry].dftParents;
346: dftEvents = nestedEvents[entry].dftEvents;
347: dftParentsSorted = nestedEvents[entry].dftParentsSorted;
348: dftEventsSorted = nestedEvents[entry].dftEventsSorted;
350: nestedEvents[entry].nParents++;
351: nParents++;
353: for (i = nParents-1; i>pentry; i--) {
354: dftParentsSorted[i] = dftParentsSorted[i-1];
355: dftEvents[i] = dftEvents[i-1];
356: }
357: for (i = nParents-1; i>tentry; i--) {
358: dftParents[i] = dftParents[i-1];
359: dftEventsSorted[i] = dftEventsSorted[i-1];
360: }
362: /* Fill in the new default timer */
363: dftParentsSorted[pentry] = dftParentActive;
364: dftEvents[pentry] = dftEvent;
365: dftParents[tentry] = dftParentActive;
366: dftEventsSorted[tentry] = dftEvent;
368: } else {
369: /* dftParentActive was found: find the corresponding default 'dftEvent'-timer */
370: dftEvent = nestedEvents[entry].dftEvents[pentry];
371: }
372: }
374: /* Start the default 'dftEvent'-timer and update the dftParentActive */
375: PetscLogStageOverride();
376: PetscLogEventBeginDefault(dftEvent,t,o1,o2,o3,o4);
377: PetscLogStageRestore();
378: dftParentActive = dftEvent;
379: return(0);
380: }
382: /* End a nested event */
383: static PetscErrorCode PetscLogEventEndNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
384: {
385: PetscErrorCode ierr;
386: int entry, pentry, nParents;
387: PetscLogEvent *dftEventsSorted;
390: /* Find the nested event */
391: PetscLogEventFindNestedTimer(nstEvent, &entry);
392: if (entry>=nNestedEvents) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Logging event %d larger than number of events %d",entry,nNestedEvents);
393: if (nestedEvents[entry].nstEvent != nstEvent) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Logging event %d had unbalanced begin/end pairs does not match %d",entry,nstEvent);
394: dftEventsSorted = nestedEvents[entry].dftEventsSorted;
395: nParents = nestedEvents[entry].nParents;
397: /* Find the current default timer among the 'dftEvents' of this event */
398: PetscLogEventFindDefaultTimer( dftParentActive, dftEventsSorted, nParents, &pentry);
400: if (pentry>=nParents) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Entry %d is larger than number of parents %d",pentry,nParents);
401: if (dftEventsSorted[pentry] != dftParentActive) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Active parent is %d, but we seem to be closing %d",dftParentActive,dftEventsSorted[pentry]);
403: /* Stop the default timer and update the dftParentActive */
404: PetscLogStageOverride();
405: PetscLogEventEndDefault(dftParentActive,t,o1,o2,o3,o4);
406: PetscLogStageRestore();
407: dftParentActive = nestedEvents[entry].dftParents[pentry];
408: return(0);
409: }
411: /*@
412: PetscLogSetThreshold - Set the threshold time for logging the events; this is a percentage out of 100, so 1. means any event
413: that takes 1 or more percent of the time.
415: Logically Collective over PETSC_COMM_WORLD
417: Input Parameter:
418: . newThresh - the threshold to use
420: Output Parameter:
421: . oldThresh - the previously set threshold value
423: Options Database Keys:
424: . -log_view :filename.xml:ascii_xml - Prints an XML summary of flop and timing information to the file
426: Usage:
427: .vb
428: PetscInitialize(...);
429: PetscLogNestedBegin();
430: PetscLogSetThreshold(0.1,&oldthresh);
431: ... code ...
432: PetscLogView(viewer);
433: PetscFinalize();
434: .ve
436: Level: advanced
438: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogView(), PetscLogTraceBegin(), PetscLogDefaultBegin(),
439: PetscLogNestedBegin()
440: @*/
441: PetscErrorCode PetscLogSetThreshold(PetscLogDouble newThresh, PetscLogDouble *oldThresh)
442: {
444: if (oldThresh) *oldThresh = thresholdTime;
445: if (newThresh == PETSC_DECIDE) newThresh = 0.01;
446: if (newThresh == PETSC_DEFAULT) newThresh = 0.01;
447: thresholdTime = PetscMax(newThresh, 0.0);
448: return(0);
449: }
451: static PetscErrorCode PetscPrintExeSpecs(PetscViewer viewer)
452: {
453: PetscErrorCode ierr;
454: char arch[128],hostname[128],username[128],pname[PETSC_MAX_PATH_LEN],date[128];
455: char version[256], buildoptions[128] = "";
456: PetscMPIInt size;
457: size_t len;
460: MPI_Comm_size(PetscObjectComm((PetscObject)viewer),&size);
461: PetscGetArchType(arch,sizeof(arch));
462: PetscGetHostName(hostname,sizeof(hostname));
463: PetscGetUserName(username,sizeof(username));
464: PetscGetProgramName(pname,sizeof(pname));
465: PetscGetDate(date,sizeof(date));
466: PetscGetVersion(version,sizeof(version));
468: PetscViewerXMLStartSection(viewer, "runspecification", "Run Specification");
469: PetscViewerXMLPutString( viewer, "executable" , "Executable" , pname );
470: PetscViewerXMLPutString( viewer, "architecture", "Architecture" , arch );
471: PetscViewerXMLPutString( viewer, "hostname" , "Host" , hostname);
472: PetscViewerXMLPutInt( viewer, "nprocesses" , "Number of processes", size );
473: PetscViewerXMLPutString( viewer, "user" , "Run by user" , username);
474: PetscViewerXMLPutString( viewer, "date" , "Started at" , date);
475: PetscViewerXMLPutString( viewer, "petscrelease", "Petsc Release", version);
477: #if defined(PETSC_USE_DEBUG)
478: PetscStrlcat(buildoptions, "Debug ", sizeof(buildoptions));
479: #endif
480: #if defined(PETSC_USE_COMPLEX)
481: PetscStrlcat(buildoptions, "Complex ", sizeof(buildoptions));
482: #endif
483: #if defined(PETSC_USE_REAL_SINGLE)
484: PetscStrlcat(buildoptions, "Single ", sizeof(buildoptions));
485: #elif defined(PETSC_USE_REAL___FLOAT128)
486: PetscStrlcat(buildoptions, "Quadruple ", sizeof(buildoptions));
487: #elif defined(PETSC_USE_REAL___FP16)
488: PetscStrlcat(buildoptions, "Half ", sizeof(buildoptions));
489: #endif
490: #if defined(PETSC_USE_64BIT_INDICES)
491: PetscStrlcat(buildoptions, "Int64 ", sizeof(buildoptions));
492: #endif
493: #if defined(__cplusplus)
494: PetscStrlcat(buildoptions, "C++ ", sizeof(buildoptions));
495: #endif
496: PetscStrlen(buildoptions,&len);
497: if (len) {
498: PetscViewerXMLPutString(viewer, "petscbuildoptions", "Petsc build options", buildoptions);
499: }
500: PetscViewerXMLEndSection(viewer, "runspecification");
501: return(0);
502: }
504: /* Print the global performance: max, max/min, average and total of
505: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
506: */
507: static PetscErrorCode PetscPrintXMLGlobalPerformanceElement(PetscViewer viewer, const char *name, const char *desc, PetscLogDouble local_val, const PetscBool print_average, const PetscBool print_total)
508: {
509: PetscErrorCode ierr;
510: PetscLogDouble min, tot, ratio, avg;
511: MPI_Comm comm;
512: PetscMPIInt rank, size;
513: PetscLogDouble valrank[2], max[2];
516: PetscObjectGetComm((PetscObject)viewer,&comm);
517: MPI_Comm_size(PetscObjectComm((PetscObject)viewer),&size);
518: MPI_Comm_rank(comm, &rank);
520: valrank[0] = local_val;
521: valrank[1] = (PetscLogDouble) rank;
522: MPIU_Allreduce(&local_val, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
523: MPIU_Allreduce(valrank, &max, 1, MPIU_2PETSCLOGDOUBLE, MPI_MAXLOC, comm);
524: MPIU_Allreduce(&local_val, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
525: avg = tot/((PetscLogDouble) size);
526: if (min != 0.0) ratio = max[0]/min;
527: else ratio = 0.0;
529: PetscViewerXMLStartSection(viewer, name, desc);
530: PetscViewerXMLPutDouble(viewer, "max", NULL, max[0], "%e");
531: PetscViewerXMLPutInt( viewer, "maxrank" , "rank at which max was found" , (PetscMPIInt) max[1] );
532: PetscViewerXMLPutDouble(viewer, "ratio", NULL, ratio, "%f");
533: if (print_average) {
534: PetscViewerXMLPutDouble(viewer, "average", NULL, avg, "%e");
535: }
536: if (print_total) {
537: PetscViewerXMLPutDouble(viewer, "total", NULL, tot, "%e");
538: }
539: PetscViewerXMLEndSection(viewer, name);
540: return(0);
541: }
543: /* Print the global performance: max, max/min, average and total of
544: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
545: */
546: static PetscErrorCode PetscPrintGlobalPerformance(PetscViewer viewer, PetscLogDouble locTotalTime)
547: {
548: PetscErrorCode ierr;
549: PetscLogDouble flops, mem, red, mess;
550: const PetscBool print_total_yes = PETSC_TRUE,
551: print_total_no = PETSC_FALSE,
552: print_average_no = PETSC_FALSE,
553: print_average_yes = PETSC_TRUE;
556: /* Must preserve reduction count before we go on */
557: red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
559: /* Calculate summary information */
560: PetscViewerXMLStartSection(viewer, "globalperformance", "Global performance");
562: /* Time */
563: PetscPrintXMLGlobalPerformanceElement(viewer, "time", "Time (sec)", locTotalTime, print_average_yes, print_total_no);
565: /* Objects */
566: PetscPrintXMLGlobalPerformanceElement(viewer, "objects", "Objects", (PetscLogDouble) petsc_numObjects, print_average_yes, print_total_no);
568: /* Flop */
569: PetscPrintXMLGlobalPerformanceElement(viewer, "mflop", "MFlop", petsc_TotalFlops/1.0E6, print_average_yes, print_total_yes);
571: /* Flop/sec -- Must talk to Barry here */
572: if (locTotalTime != 0.0) flops = petsc_TotalFlops/locTotalTime;
573: else flops = 0.0;
574: PetscPrintXMLGlobalPerformanceElement(viewer, "mflops", "MFlop/sec", flops/1.0E6, print_average_yes, print_total_yes);
576: /* Memory */
577: PetscMallocGetMaximumUsage(&mem);
578: if (mem > 0.0) {
579: PetscPrintXMLGlobalPerformanceElement(viewer, "memory", "Memory (MiB)", mem/1024.0/1024.0, print_average_yes, print_total_yes);
580: }
581: /* Messages */
582: mess = 0.5*(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
583: PetscPrintXMLGlobalPerformanceElement(viewer, "messagetransfers", "MPI Message Transfers", mess, print_average_yes, print_total_yes);
585: /* Message Volume */
586: mess = 0.5*(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
587: PetscPrintXMLGlobalPerformanceElement(viewer, "messagevolume", "MPI Message Volume (MiB)", mess/1024.0/1024.0, print_average_yes, print_total_yes);
589: /* Reductions */
590: PetscPrintXMLGlobalPerformanceElement(viewer, "reductions", "MPI Reductions", red , print_average_no, print_total_no);
591: PetscViewerXMLEndSection(viewer, "globalperformance");
592: return(0);
593: }
595: typedef struct {
596: PetscLogEvent dftEvent;
597: NestedEventId nstEvent;
598: PetscLogEvent dftParent;
599: NestedEventId nstParent;
600: PetscBool own;
601: int depth;
602: NestedEventId* nstPath;
603: } PetscNestedEventTree;
605: /* Compare timers to sort them in the tree */
606: static int compareTreeItems(const void *item1_, const void *item2_)
607: {
608: int i;
609: PetscNestedEventTree *item1 = (PetscNestedEventTree *) item1_;
610: PetscNestedEventTree *item2 = (PetscNestedEventTree *) item2_;
612: for (i=0; i<PetscMin(item1->depth,item2->depth); i++) {
613: if (item1->nstPath[i]<item2->nstPath[i]) return -1;
614: if (item1->nstPath[i]>item2->nstPath[i]) return +1;
615: }
616: if (item1->depth < item2->depth) return -1;
617: if (item1->depth > item2->depth) return 1;
618: return 0;
619: }
620: /*
621: * Do MPI communication to get the complete, nested calling tree for all processes: there may be
622: * calls that happen in some processes, but not in others.
623: *
624: * The output, tree[nTimers] is an array of PetscNestedEventTree-structs.
625: * The tree is sorted so that the timers can be printed in the order of appearance.
626: *
627: * For tree-items which appear in the trees of multiple processes (which will be most items), the
628: * following rule is followed:
629: * + if information from my own process is available, then that is the information stored in tree.
630: * otherwise it is some other process's information.
631: */
632: static PetscErrorCode PetscLogNestedTreeCreate(PetscViewer viewer, PetscNestedEventTree **p_tree, int *p_nTimers)
633: {
634: PetscNestedEventTree *tree = NULL, *newTree;
635: int *treeIndices;
636: int nTimers, totalNTimers, i, j, iTimer0, maxDefaultTimer;
637: int yesno;
638: PetscBool done;
639: PetscErrorCode ierr;
640: int maxdepth;
641: int depth;
642: int illegalEvent;
643: int iextra;
644: NestedEventId *nstPath, *nstMyPath;
645: MPI_Comm comm;
648: PetscObjectGetComm((PetscObject)viewer,&comm);
650: /* Calculate memory needed to store everybody's information and allocate tree */
651: nTimers = 0;
652: for (i=0; i<nNestedEvents; i++) nTimers += nestedEvents[i].nParents;
654: PetscMalloc1(nTimers,&tree);
656: /* Fill tree with readily available information */
657: iTimer0 = 0;
658: maxDefaultTimer =0;
659: for (i=0; i<nNestedEvents; i++) {
660: int nParents = nestedEvents[i].nParents;
661: NestedEventId nstEvent = nestedEvents[i].nstEvent;
662: PetscLogEvent *dftParentsSorted = nestedEvents[i].dftParentsSorted;
663: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
664: for (j=0; j<nParents; j++) {
665: maxDefaultTimer = PetscMax(dftEvents[j],maxDefaultTimer);
667: tree[iTimer0+j].dftEvent = dftEvents[j];
668: tree[iTimer0+j].nstEvent = nstEvent;
669: tree[iTimer0+j].dftParent = dftParentsSorted[j];
670: tree[iTimer0+j].own = PETSC_TRUE;
672: tree[iTimer0+j].nstParent = 0;
673: tree[iTimer0+j].depth = 0;
674: tree[iTimer0+j].nstPath = NULL;
675: }
676: iTimer0 += nParents;
677: }
679: /* Calculate the global maximum for the default timer index, so array treeIndices can
680: * be allocated only once */
681: MPIU_Allreduce(&maxDefaultTimer, &j, 1, MPI_INT, MPI_MAX, comm);
682: maxDefaultTimer = j;
684: /* Find default timer's place in the tree */
685: PetscCalloc1(maxDefaultTimer+1,&treeIndices);
686: treeIndices[0] = 0;
687: for (i=0; i<nTimers; i++) {
688: PetscLogEvent dftEvent = tree[i].dftEvent;
689: treeIndices[dftEvent] = i;
690: }
692: /* Find each dftParent's nested identification */
693: for (i=0; i<nTimers; i++) {
694: PetscLogEvent dftParent = tree[i].dftParent;
695: if (dftParent!= DFT_ID_AWAKE) {
696: int j = treeIndices[dftParent];
697: tree[i].nstParent = tree[j].nstEvent;
698: }
699: }
701: /* Find depths for each timer path */
702: done = PETSC_FALSE;
703: maxdepth = 0;
704: while (!done) {
705: done = PETSC_TRUE;
706: for (i=0; i<nTimers; i++) {
707: if (tree[i].dftParent == DFT_ID_AWAKE) {
708: tree[i].depth = 1;
709: maxdepth = PetscMax(1,maxdepth);
710: } else {
711: int j = treeIndices[tree[i].dftParent];
712: depth = 1+tree[j].depth;
713: if (depth>tree[i].depth) {
714: done = PETSC_FALSE;
715: tree[i].depth = depth;
716: maxdepth = PetscMax(depth,maxdepth);
717: }
718: }
719: }
720: }
722: /* Allocate the paths in the entire tree */
723: for (i=0; i<nTimers; i++) {
724: depth = tree[i].depth;
725: PetscCalloc1(depth,&tree[i].nstPath);
726: }
728: /* Calculate the paths for all timers */
729: for (depth=1; depth<=maxdepth; depth++) {
730: for (i=0; i<nTimers; i++) {
731: if (tree[i].depth==depth) {
732: if (depth>1) {
733: int j = treeIndices[tree[i].dftParent];
734: PetscArraycpy(tree[i].nstPath,tree[j].nstPath,depth-1);
735: }
736: tree[i].nstPath[depth-1] = tree[i].nstEvent;
737: }
738: }
739: }
740: PetscFree(treeIndices);
742: /* Sort the tree on basis of the paths */
743: qsort(tree, nTimers, sizeof(PetscNestedEventTree), compareTreeItems);
745: /* Allocate an array to store paths */
746: depth = maxdepth;
747: MPIU_Allreduce(&depth, &maxdepth, 1, MPI_INT, MPI_MAX, comm);
748: PetscMalloc1(maxdepth+1, &nstPath);
749: PetscMalloc1(maxdepth+1, &nstMyPath);
751: /* Find an illegal nested event index (1+largest nested event index) */
752: illegalEvent = 1+nestedEvents[nNestedEvents-1].nstEvent;
753: i = illegalEvent;
754: MPIU_Allreduce(&i, &illegalEvent, 1, MPI_INT, MPI_MAX, comm);
756: /* First, detect timers which are not available in this process, but are available in others
757: * Allocate a new tree, that can contain all timers
758: * Then, fill the new tree with all (own and not-own) timers */
759: newTree= NULL;
760: for (yesno=0; yesno<=1; yesno++) {
761: depth = 1;
762: i = 0;
763: iextra = 0;
764: while (depth>0) {
765: int j;
766: PetscBool same;
768: /* Construct the next path in this process's tree:
769: * if necessary, supplement with invalid path entries */
770: depth++;
771: if (depth > maxdepth + 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Depth %d > maxdepth+1 %d",depth,maxdepth+1);
772: if (i<nTimers) {
773: for (j=0; j<tree[i].depth; j++) nstMyPath[j] = tree[i].nstPath[j];
774: for (j=tree[i].depth; j<depth; j++) nstMyPath[j] = illegalEvent;
775: } else {
776: for (j=0; j<depth; j++) nstMyPath[j] = illegalEvent;
777: }
779: /* Communicate with other processes to obtain the next path and its depth */
780: MPIU_Allreduce(nstMyPath, nstPath, depth, MPI_INT, MPI_MIN, comm);
781: for (j=depth-1; (int) j>=0; j--) {
782: if (nstPath[j]==illegalEvent) depth=j;
783: }
785: if (depth>0) {
786: /* If the path exists */
788: /* check whether the next path is the same as this process's next path */
789: same = PETSC_TRUE;
790: for (j=0; same && j<depth; j++) { same = (same && nstMyPath[j] == nstPath[j]) ? PETSC_TRUE : PETSC_FALSE;}
792: if (same) {
793: /* Register 'own path' */
794: if (newTree) newTree[i+iextra] = tree[i];
795: i++;
796: } else {
797: /* Register 'not an own path' */
798: if (newTree) {
799: newTree[i+iextra].nstEvent = nstPath[depth-1];
800: newTree[i+iextra].own = PETSC_FALSE;
801: newTree[i+iextra].depth = depth;
802: PetscMalloc1(depth, &newTree[i+iextra].nstPath);
803: for (j=0; j<depth; j++) {newTree[i+iextra].nstPath[j] = nstPath[j];}
805: newTree[i+iextra].dftEvent = 0;
806: newTree[i+iextra].dftParent = 0;
807: newTree[i+iextra].nstParent = 0;
808: }
809: iextra++;
810: }
812: }
813: }
815: /* Determine the size of the complete tree (with own and not-own timers) and allocate the new tree */
816: totalNTimers = nTimers + iextra;
817: if (!newTree) {
818: PetscMalloc1(totalNTimers, &newTree);
819: }
820: }
821: PetscFree(nstPath);
822: PetscFree(nstMyPath);
823: PetscFree(tree);
824: tree = newTree;
825: newTree = NULL;
827: /* Set return value and return */
828: *p_tree = tree;
829: *p_nTimers = totalNTimers;
830: return(0);
831: }
833: /*
834: * Delete the nested timer tree
835: */
836: static PetscErrorCode PetscLogNestedTreeDestroy(PetscNestedEventTree *tree, int nTimers)
837: {
838: int i;
839: PetscErrorCode ierr;
842: for (i=0; i<nTimers; i++) {
843: PetscFree(tree[i].nstPath);
844: }
845: PetscFree(tree);
846: return(0);
847: }
849: /* Print the global performance: max, max/min, average and total of
850: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
851: */
852: static PetscErrorCode PetscPrintXMLNestedLinePerfResults(PetscViewer viewer,const char *name,PetscLogDouble value,PetscLogDouble minthreshold,PetscLogDouble maxthreshold,PetscLogDouble minmaxtreshold)
853: {
854: MPI_Comm comm; /* MPI communicator in reduction */
855: PetscMPIInt rank; /* rank of this process */
856: PetscLogDouble val_in[2], max[2], min[2];
857: PetscLogDouble minvalue, maxvalue, tot;
858: PetscMPIInt size;
859: PetscMPIInt minLoc, maxLoc;
863: PetscObjectGetComm((PetscObject)viewer,&comm);
864: MPI_Comm_size(comm, &size);
865: MPI_Comm_rank(comm, &rank);
866: val_in[0] = value;
867: val_in[1] = (PetscLogDouble) rank;
868: MPIU_Allreduce(val_in, max, 1, MPIU_2PETSCLOGDOUBLE, MPI_MAXLOC, comm);
869: MPIU_Allreduce(val_in, min, 1, MPIU_2PETSCLOGDOUBLE, MPI_MINLOC, comm);
870: maxvalue = max[0];
871: maxLoc = (PetscMPIInt) max[1];
872: minvalue = min[0];
873: minLoc = (PetscMPIInt) min[1];
874: MPIU_Allreduce(&value, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
876: if (maxvalue<maxthreshold && minvalue>=minthreshold) {
877: /* One call per parent or NO value: don't print */
878: } else {
879: PetscViewerXMLStartSection(viewer, name, NULL);
880: if (maxvalue>minvalue*minmaxtreshold) {
881: PetscViewerXMLPutDouble(viewer, "avgvalue", NULL, tot/size, "%g");
882: PetscViewerXMLPutDouble(viewer, "minvalue", NULL, minvalue, "%g");
883: PetscViewerXMLPutDouble(viewer, "maxvalue", NULL, maxvalue, "%g");
884: PetscViewerXMLPutInt( viewer, "minloc" , NULL, minLoc);
885: PetscViewerXMLPutInt( viewer, "maxloc" , NULL, maxLoc);
886: } else {
887: PetscViewerXMLPutDouble(viewer, "value", NULL, tot/size, "%g");
888: }
889: PetscViewerXMLEndSection(viewer, name);
890: }
891: return(0);
892: }
894: #define N_COMM 8
895: static PetscErrorCode PetscLogNestedTreePrintLine(PetscViewer viewer,PetscEventPerfInfo perfInfo,PetscLogDouble countsPerCall,int parentCount,int depth,const char *name,PetscLogDouble totalTime,PetscBool *isPrinted)
896: {
897: PetscLogDouble time = perfInfo.time;
898: PetscLogDouble timeMx;
900: MPI_Comm comm;
903: PetscObjectGetComm((PetscObject)viewer,&comm);
904: MPIU_Allreduce(&time, &timeMx, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
905: *isPrinted = ((timeMx/totalTime) >= THRESHOLD) ? PETSC_TRUE : PETSC_FALSE;
906: if (*isPrinted) {
907: PetscViewerXMLStartSection(viewer, "event", NULL);
908: PetscViewerXMLPutString(viewer, "name", NULL, name);
909: PetscPrintXMLNestedLinePerfResults(viewer, "time", time/totalTime*100.0, 0, 0, 1.02);
910: PetscPrintXMLNestedLinePerfResults(viewer, "ncalls", parentCount>0 ? countsPerCall : 0, 0.99, 1.01, 1.02);
911: PetscPrintXMLNestedLinePerfResults(viewer, "mflops", time>=timeMx*0.001 ? 1e-6*perfInfo.flops/time : 0, 0, 0.01, 1.05);
912: PetscPrintXMLNestedLinePerfResults(viewer, "mbps",time>=timeMx*0.001 ? perfInfo.messageLength/(1024*1024*time) : 0, 0, 0.01, 1.05);
913: PetscPrintXMLNestedLinePerfResults(viewer, "nreductsps", time>=timeMx*0.001 ? perfInfo.numReductions/time : 0, 0, 0.01, 1.05);
914: }
915: return(0);
916: }
918: /* Count the number of times the parent event was called */
920: static int countParents( const PetscNestedEventTree *tree, PetscEventPerfInfo *eventPerfInfo, int i)
921: {
922: if (tree[i].depth<=1) {
923: return 1; /* Main event: only once */
924: } else if (!tree[i].own) {
925: return 1; /* This event didn't happen in this process, but did in another */
926: } else {
927: int iParent;
928: for (iParent=i-1; iParent>=0; iParent--) {
929: if (tree[iParent].depth == tree[i].depth-1) break;
930: }
931: if (tree[iParent].depth != tree[i].depth-1) {
932: /* ***** Internal error: cannot find parent */
933: return -2;
934: } else {
935: PetscLogEvent dftEvent = tree[iParent].dftEvent;
936: return eventPerfInfo[dftEvent].count;
937: }
938: }
939: }
941: typedef struct {
942: int id;
943: PetscLogDouble val;
944: } PetscSortItem;
946: static int compareSortItems(const void *item1_, const void *item2_)
947: {
948: PetscSortItem *item1 = (PetscSortItem *) item1_;
949: PetscSortItem *item2 = (PetscSortItem *) item2_;
950: if (item1->val > item2->val) return -1;
951: if (item1->val < item2->val) return +1;
952: return 0;
953: }
955: static PetscErrorCode PetscLogNestedTreePrint(PetscViewer viewer, PetscNestedEventTree *tree, int nTimers, int iStart, PetscLogDouble totalTime)
956: {
957: int depth = tree[iStart].depth;
958: const char *name;
959: int parentCount, nChildren;
960: PetscSortItem *children;
961: PetscErrorCode ierr;
962: const int stage = MAINSTAGE;
963: PetscStageLog stageLog;
964: PetscEventRegInfo *eventRegInfo;
965: PetscEventPerfInfo *eventPerfInfo;
966: PetscEventPerfInfo myPerfInfo, otherPerfInfo, selfPerfInfo;
967: PetscLogDouble countsPerCall;
968: PetscBool wasPrinted;
969: PetscBool childWasPrinted;
970: MPI_Comm comm;
973: /* Look up the name of the event and its PerfInfo */
974: PetscLogGetStageLog(&stageLog);
975: eventRegInfo = stageLog->eventLog->eventInfo;
976: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
977: name = eventRegInfo[(PetscLogEvent)tree[iStart].nstEvent].name;
978: PetscObjectGetComm((PetscObject)viewer,&comm);
980: /* Count the number of child processes */
981: nChildren = 0;
982: {
983: int i;
984: for (i=iStart+1; i<nTimers; i++) {
985: if (tree[i].depth <= depth) break;
986: if (tree[i].depth == depth + 1) nChildren++;
987: }
988: }
990: if (nChildren>0) {
991: /* Create an array for the id-s and maxTimes of the children,
992: * leaving 2 spaces for self-time and other-time */
993: int i;
994: PetscLogDouble *times, *maxTimes;
996: PetscMalloc1(nChildren+2,&children);
997: nChildren = 0;
998: for (i=iStart+1; i<nTimers; i++) {
999: if (tree[i].depth<=depth) break;
1000: if (tree[i].depth == depth + 1) {
1001: children[nChildren].id = i;
1002: children[nChildren].val = eventPerfInfo[tree[i].dftEvent].time ;
1003: nChildren++;
1004: }
1005: }
1007: /* Calculate the children's maximum times, to see whether children will be ignored or printed */
1008: PetscMalloc1(nChildren,×);
1009: for (i=0; i<nChildren; i++) { times[i] = children[i].val; }
1011: PetscMalloc1(nChildren,&maxTimes);
1012: MPIU_Allreduce(times, maxTimes, nChildren, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1013: PetscFree(times);
1015: for (i=0; i<nChildren; i++) { children[i].val = maxTimes[i]; }
1016: PetscFree(maxTimes);
1017: }
1019: if (!tree[iStart].own) {
1020: /* Set values for a timer that was not activated in this process
1021: * (but was, in other processes of this run) */
1022: PetscMemzero(&myPerfInfo,sizeof(myPerfInfo));
1024: selfPerfInfo = myPerfInfo;
1025: otherPerfInfo = myPerfInfo;
1027: parentCount = 1;
1028: countsPerCall = 0;
1029: } else {
1030: /* Set the values for a timer that was activated in this process */
1031: int i;
1032: PetscLogEvent dftEvent = tree[iStart].dftEvent;
1034: parentCount = countParents( tree, eventPerfInfo, iStart);
1035: myPerfInfo = eventPerfInfo[dftEvent];
1036: countsPerCall = (PetscLogDouble) myPerfInfo.count / (PetscLogDouble) parentCount;
1038: selfPerfInfo = myPerfInfo;
1039: otherPerfInfo.time = 0;
1040: otherPerfInfo.flops = 0;
1041: otherPerfInfo.numMessages = 0;
1042: otherPerfInfo.messageLength = 0;
1043: otherPerfInfo.numReductions = 0;
1045: for (i=0; i<nChildren; i++) {
1046: /* For all child counters: subtract the child values from self-timers */
1048: PetscLogEvent dftChild = tree[children[i].id].dftEvent;
1049: PetscEventPerfInfo childPerfInfo = eventPerfInfo[dftChild];
1051: selfPerfInfo.time -= childPerfInfo.time;
1052: selfPerfInfo.flops -= childPerfInfo.flops;
1053: selfPerfInfo.numMessages -= childPerfInfo.numMessages;
1054: selfPerfInfo.messageLength -= childPerfInfo.messageLength;
1055: selfPerfInfo.numReductions -= childPerfInfo.numReductions;
1057: if ((children[i].val/totalTime) < THRESHOLD) {
1058: /* Add them to 'other' if the time is ignored in the output */
1059: otherPerfInfo.time += childPerfInfo.time;
1060: otherPerfInfo.flops += childPerfInfo.flops;
1061: otherPerfInfo.numMessages += childPerfInfo.numMessages;
1062: otherPerfInfo.messageLength += childPerfInfo.messageLength;
1063: otherPerfInfo.numReductions += childPerfInfo.numReductions;
1064: }
1065: }
1066: }
1068: /* Main output for this timer */
1069: PetscLogNestedTreePrintLine(viewer, myPerfInfo, countsPerCall, parentCount, depth, name, totalTime, &wasPrinted);
1071: /* Now print the lines for the children */
1072: if (nChildren > 0) {
1073: /* Calculate max-times for 'self' and 'other' */
1074: int i;
1075: PetscLogDouble times[2], maxTimes[2];
1076: times[0] = selfPerfInfo.time; times[1] = otherPerfInfo.time;
1077: MPIU_Allreduce(times, maxTimes, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1078: children[nChildren+0].id = -1;
1079: children[nChildren+0].val = maxTimes[0];
1080: children[nChildren+1].id = -2;
1081: children[nChildren+1].val = maxTimes[1];
1083: /* Now sort the children (including 'self' and 'other') on total time */
1084: qsort(children, nChildren+2, sizeof(PetscSortItem), compareSortItems);
1086: /* Print (or ignore) the children in ascending order of total time */
1087: PetscViewerXMLStartSection(viewer,"events", NULL);
1088: for (i=0; i<nChildren+2; i++) {
1089: if ((children[i].val/totalTime) < THRESHOLD) {
1090: /* ignored: no output */
1091: } else if (children[i].id==-1) {
1092: PetscLogNestedTreePrintLine(viewer, selfPerfInfo, 1, parentCount, depth+1, "self", totalTime, &childWasPrinted);
1093: if (childWasPrinted) {
1094: PetscViewerXMLEndSection(viewer,"event");
1095: }
1096: } else if (children[i].id==-2) {
1097: size_t len;
1098: char *otherName;
1100: PetscStrlen(name,&len);
1101: PetscMalloc1(len+16,&otherName);
1102: PetscSNPrintf(otherName,len+16,"%s: other-timed",name);
1103: PetscLogNestedTreePrintLine(viewer, otherPerfInfo, 1, 1, depth+1, otherName, totalTime, &childWasPrinted);
1104: PetscFree(otherName);
1105: if (childWasPrinted) {
1106: PetscViewerXMLEndSection(viewer,"event");
1107: }
1108: } else {
1109: /* Print the child with a recursive call to this function */
1110: PetscLogNestedTreePrint(viewer, tree, nTimers, children[i].id, totalTime);
1111: }
1112: }
1113: PetscViewerXMLEndSection(viewer,"events");
1114: PetscFree(children);
1115: }
1117: if (wasPrinted) {
1118: PetscViewerXMLEndSection(viewer, "event");
1119: }
1120: return(0);
1121: }
1123: static PetscErrorCode PetscLogNestedTreePrintTop(PetscViewer viewer, PetscNestedEventTree *tree, int nTimers, PetscLogDouble totalTime)
1124: {
1125: int i, nChildren;
1126: PetscSortItem *children;
1127: PetscErrorCode ierr;
1128: const int stage = MAINSTAGE;
1129: PetscStageLog stageLog;
1130: PetscEventPerfInfo *eventPerfInfo;
1131: MPI_Comm comm;
1134: PetscObjectGetComm((PetscObject)viewer,&comm);
1136: /* Look up the PerfInfo */
1137: PetscLogGetStageLog(&stageLog);
1138: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1140: /* Count the number of child processes, and count total time */
1141: nChildren = 0;
1142: for (i=0; i<nTimers; i++)
1143: if (tree[i].depth==1) nChildren++;
1145: if (nChildren>0) {
1146: /* Create an array for the id-s and maxTimes of the children,
1147: * leaving 2 spaces for self-time and other-time */
1148: PetscLogDouble *times, *maxTimes;
1150: PetscMalloc1(nChildren,&children);
1151: nChildren = 0;
1152: for (i=0; i<nTimers; i++) {
1153: if (tree[i].depth == 1) {
1154: children[nChildren].id = i;
1155: children[nChildren].val = eventPerfInfo[tree[i].dftEvent].time ;
1156: nChildren++;
1157: }
1158: }
1160: /* Calculate the children's maximum times, to sort them */
1161: PetscMalloc1(nChildren,×);
1162: for (i=0; i<nChildren; i++) { times[i] = children[i].val; }
1164: PetscMalloc1(nChildren,&maxTimes);
1165: MPIU_Allreduce(times, maxTimes, nChildren, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1166: PetscFree(times);
1168: for (i=0; i<nChildren; i++) { children[i].val = maxTimes[i]; }
1169: PetscFree(maxTimes);
1171: /* Now sort the children on total time */
1172: qsort(children, nChildren, sizeof(PetscSortItem), compareSortItems);
1173: /* Print (or ignore) the children in ascending order of total time */
1174: PetscViewerXMLStartSection(viewer, "timertree", "Timings tree");
1175: PetscViewerXMLPutDouble(viewer, "totaltime", NULL, totalTime, "%f");
1176: PetscViewerXMLPutDouble(viewer, "timethreshold", NULL, thresholdTime, "%f");
1178: for (i=0; i<nChildren; i++) {
1179: if ((children[i].val/totalTime) < THRESHOLD) {
1180: /* ignored: no output */
1181: } else {
1182: /* Print the child with a recursive call to this function */
1183: PetscLogNestedTreePrint(viewer, tree, nTimers, children[i].id, totalTime);
1184: }
1185: }
1186: PetscViewerXMLEndSection(viewer, "timertree");
1187: PetscFree(children);
1188: }
1189: return(0);
1190: }
1192: typedef struct {
1193: char *name;
1194: PetscLogDouble time;
1195: PetscLogDouble flops;
1196: PetscLogDouble numMessages;
1197: PetscLogDouble messageLength;
1198: PetscLogDouble numReductions;
1199: } PetscSelfTimer;
1201: static PetscErrorCode PetscCalcSelfTime(PetscViewer viewer, PetscSelfTimer **p_self, int *p_nstMax)
1202: {
1203: PetscErrorCode ierr;
1204: const int stage = MAINSTAGE;
1205: PetscStageLog stageLog;
1206: PetscEventRegInfo *eventRegInfo;
1207: PetscEventPerfInfo *eventPerfInfo;
1208: PetscSelfTimer *selftimes;
1209: PetscSelfTimer *totaltimes;
1210: NestedEventId *nstEvents;
1211: int i, j, maxDefaultTimer;
1212: NestedEventId nst;
1213: PetscLogEvent dft;
1214: int nstMax, nstMax_local;
1215: MPI_Comm comm;
1218: PetscObjectGetComm((PetscObject)viewer,&comm);
1219: PetscLogGetStageLog(&stageLog);
1220: eventRegInfo = stageLog->eventLog->eventInfo;
1221: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1223: /* For each default timer, calculate the (one) nested timer that it corresponds to. */
1224: maxDefaultTimer =0;
1225: for (i=0; i<nNestedEvents; i++) {
1226: int nParents = nestedEvents[i].nParents;
1227: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1228: for (j=0; j<nParents; j++) maxDefaultTimer = PetscMax(dftEvents[j],maxDefaultTimer);
1229: }
1230: PetscMalloc1(maxDefaultTimer+1,&nstEvents);
1231: for (dft=0; dft<maxDefaultTimer; dft++) {nstEvents[dft] = 0;}
1232: for (i=0; i<nNestedEvents; i++) {
1233: int nParents = nestedEvents[i].nParents;
1234: NestedEventId nstEvent = nestedEvents[i].nstEvent;
1235: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1236: for (j=0; j<nParents; j++) nstEvents[dftEvents[j]] = nstEvent;
1237: }
1239: /* Calculate largest nested event-ID */
1240: nstMax_local = 0;
1241: for (i=0; i<nNestedEvents; i++) nstMax_local = PetscMax(nestedEvents[i].nstEvent,nstMax_local);
1242: MPIU_Allreduce(&nstMax_local, &nstMax, 1, MPI_INT, MPI_MAX, comm);
1244: /* Initialize all total-times with zero */
1245: PetscMalloc1(nstMax+1,&selftimes);
1246: PetscMalloc1(nstMax+1,&totaltimes);
1247: for (nst=0; nst<=nstMax; nst++) {
1248: totaltimes[nst].time = 0;
1249: totaltimes[nst].flops = 0;
1250: totaltimes[nst].numMessages = 0;
1251: totaltimes[nst].messageLength = 0;
1252: totaltimes[nst].numReductions = 0;
1253: totaltimes[nst].name = NULL;
1254: }
1256: /* Calculate total-times */
1257: for (i=0; i<nNestedEvents; i++) {
1258: const int nParents = nestedEvents[i].nParents;
1259: const NestedEventId nstEvent = nestedEvents[i].nstEvent;
1260: const PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1261: for (j=0; j<nParents; j++) {
1262: const PetscLogEvent dftEvent = dftEvents[j];
1263: totaltimes[nstEvent].time += eventPerfInfo[dftEvent].time;
1264: totaltimes[nstEvent].flops += eventPerfInfo[dftEvent].flops;
1265: totaltimes[nstEvent].numMessages += eventPerfInfo[dftEvent].numMessages;
1266: totaltimes[nstEvent].messageLength += eventPerfInfo[dftEvent].messageLength;
1267: totaltimes[nstEvent].numReductions += eventPerfInfo[dftEvent].numReductions;
1268: }
1269: totaltimes[nstEvent].name = eventRegInfo[(PetscLogEvent)nstEvent].name;
1270: }
1272: /* Initialize: self-times := totaltimes */
1273: for (nst=0; nst<=nstMax; nst++) { selftimes[nst] = totaltimes[nst]; }
1275: /* Subtract timed subprocesses from self-times */
1276: for (i=0; i<nNestedEvents; i++) {
1277: const int nParents = nestedEvents[i].nParents;
1278: const PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1279: const NestedEventId *dftParentsSorted = nestedEvents[i].dftParentsSorted;
1280: for (j=0; j<nParents; j++) {
1281: if (dftParentsSorted[j] != DFT_ID_AWAKE) {
1282: const PetscLogEvent dftEvent = dftEvents[j];
1283: const NestedEventId nstParent = nstEvents[dftParentsSorted[j]];
1284: selftimes[nstParent].time -= eventPerfInfo[dftEvent].time;
1285: selftimes[nstParent].flops -= eventPerfInfo[dftEvent].flops;
1286: selftimes[nstParent].numMessages -= eventPerfInfo[dftEvent].numMessages;
1287: selftimes[nstParent].messageLength -= eventPerfInfo[dftEvent].messageLength;
1288: selftimes[nstParent].numReductions -= eventPerfInfo[dftEvent].numReductions;
1289: }
1290: }
1291: }
1293: PetscFree(nstEvents);
1294: PetscFree(totaltimes);
1296: /* Set outputs */
1297: *p_self = selftimes;
1298: *p_nstMax = nstMax;
1299: return(0);
1300: }
1302: static PetscErrorCode PetscPrintSelfTime(PetscViewer viewer, const PetscSelfTimer *selftimes, int nstMax, PetscLogDouble totalTime)
1303: {
1304: PetscErrorCode ierr;
1305: int i;
1306: NestedEventId nst;
1307: PetscSortItem *sortSelfTimes;
1308: PetscLogDouble *times, *maxTimes;
1309: PetscStageLog stageLog;
1310: PetscEventRegInfo *eventRegInfo;
1311: const int dum_depth = 1, dum_count=1, dum_parentcount=1;
1312: PetscBool wasPrinted;
1313: MPI_Comm comm;
1316: PetscObjectGetComm((PetscObject)viewer,&comm);
1317: PetscLogGetStageLog(&stageLog);
1318: eventRegInfo = stageLog->eventLog->eventInfo;
1320: PetscMalloc1(nstMax+1,×);
1321: PetscMalloc1(nstMax+1,&maxTimes);
1322: for (nst=0; nst<=nstMax; nst++) { times[nst] = selftimes[nst].time;}
1323: MPIU_Allreduce(times, maxTimes, nstMax+1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1324: PetscFree(times);
1326: PetscMalloc1(nstMax+1,&sortSelfTimes);
1328: /* Sort the self-timers on basis of the largest time needed */
1329: for (nst=0; nst<=nstMax; nst++) {
1330: sortSelfTimes[nst].id = nst;
1331: sortSelfTimes[nst].val = maxTimes[nst];
1332: }
1333: PetscFree(maxTimes);
1334: qsort(sortSelfTimes, nstMax+1, sizeof(PetscSortItem), compareSortItems);
1336: PetscViewerXMLStartSection(viewer, "selftimertable", "Self-timings");
1337: PetscViewerXMLPutDouble(viewer, "totaltime", NULL, totalTime, "%f");
1339: for (i=0; i<=nstMax; i++) {
1340: if ((sortSelfTimes[i].val/totalTime) >= THRESHOLD) {
1341: NestedEventId nstEvent = sortSelfTimes[i].id;
1342: const char *name = eventRegInfo[(PetscLogEvent)nstEvent].name;
1343: PetscEventPerfInfo selfPerfInfo;
1345: selfPerfInfo.time = selftimes[nstEvent].time ;
1346: selfPerfInfo.flops = selftimes[nstEvent].flops;
1347: selfPerfInfo.numMessages = selftimes[nstEvent].numMessages;
1348: selfPerfInfo.messageLength = selftimes[nstEvent].messageLength;
1349: selfPerfInfo.numReductions = selftimes[nstEvent].numReductions;
1351: PetscLogNestedTreePrintLine(viewer, selfPerfInfo, dum_count, dum_parentcount, dum_depth, name, totalTime, &wasPrinted);
1352: if (wasPrinted){
1353: PetscViewerXMLEndSection(viewer, "event");
1354: }
1355: }
1356: }
1357: PetscViewerXMLEndSection(viewer, "selftimertable");
1358: PetscFree(sortSelfTimes);
1359: return(0);
1360: }
1362: PetscErrorCode PetscLogView_Nested(PetscViewer viewer)
1363: {
1364: PetscErrorCode ierr;
1365: PetscLogDouble locTotalTime, globTotalTime;
1366: PetscNestedEventTree *tree = NULL;
1367: PetscSelfTimer *selftimers = NULL;
1368: int nTimers = 0, nstMax = 0;
1369: MPI_Comm comm;
1372: PetscObjectGetComm((PetscObject)viewer,&comm);
1373: PetscViewerInitASCII_XML(viewer);
1374: PetscViewerASCIIPrintf(viewer, "<!-- PETSc Performance Summary: -->\n");
1375: PetscViewerXMLStartSection(viewer, "petscroot", NULL);
1377: /* Get the total elapsed time, local and global maximum */
1378: PetscTime(&locTotalTime); locTotalTime -= petsc_BaseTime;
1379: MPIU_Allreduce(&locTotalTime, &globTotalTime, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1381: /* Print global information about this run */
1382: PetscPrintExeSpecs(viewer);
1383: PetscPrintGlobalPerformance(viewer, locTotalTime);
1385: /* Collect nested timer tree info from all processes */
1386: PetscLogNestedTreeCreate(viewer, &tree, &nTimers);
1387: PetscLogNestedTreePrintTop(viewer, tree, nTimers, globTotalTime);
1388: PetscLogNestedTreeDestroy(tree, nTimers);
1390: /* Calculate self-time for all (not-nested) events */
1391: PetscCalcSelfTime(viewer, &selftimers, &nstMax);
1392: PetscPrintSelfTime(viewer, selftimers, nstMax, globTotalTime);
1393: PetscFree(selftimers);
1395: PetscViewerXMLEndSection(viewer, "petscroot");
1396: PetscViewerFinalASCII_XML(viewer);
1397: return(0);
1398: }
1400: PETSC_EXTERN PetscErrorCode PetscASend(int count, int datatype)
1401: {
1402: #if !defined(MPIUNI_H) && !defined(PETSC_HAVE_BROKEN_RECURSIVE_MACRO) && !defined(PETSC_HAVE_MPI_MISSING_TYPESIZE)
1404: #endif
1407: petsc_send_ct++;
1408: #if !defined(MPIUNI_H) && !defined(PETSC_HAVE_BROKEN_RECURSIVE_MACRO) && !defined(PETSC_HAVE_MPI_MISSING_TYPESIZE)
1409: PetscMPITypeSize(count,MPI_Type_f2c((MPI_Fint) datatype),&petsc_send_len);
1410: #endif
1411: return(0);
1412: }
1414: PETSC_EXTERN PetscErrorCode PetscARecv(int count, int datatype)
1415: {
1416: #if !defined(MPIUNI_H) && !defined(PETSC_HAVE_BROKEN_RECURSIVE_MACRO) && !defined(PETSC_HAVE_MPI_MISSING_TYPESIZE)
1418: #endif
1421: petsc_recv_ct++;
1422: #if !defined(MPIUNI_H) && !defined(PETSC_HAVE_BROKEN_RECURSIVE_MACRO) && !defined(PETSC_HAVE_MPI_MISSING_TYPESIZE)
1423: PetscMPITypeSize(count,MPI_Type_f2c((MPI_Fint) datatype),&petsc_recv_len);
1424: #endif
1425: return(0);
1426: }
1428: PETSC_EXTERN PetscErrorCode PetscAReduce()
1429: {
1431: petsc_allreduce_ct++;
1432: return(0);
1433: }
1435: #endif