Actual source code: xmllogevent.c
petsc-3.7.3 2016-08-01
1: /*************************************************************************************
2: * M A R I T I M E R E S E A R C H I N S T I T U T E N E T H E R L A N D S *
3: *************************************************************************************
4: * authors: Bas van 't Hof, Koos Huijssen, Christiaan M. Klaij *
5: *************************************************************************************
6: * content: Support for nested PetscTimers *
7: *************************************************************************************/
8: #include <petsclog.h>
9: #include <petsc/private/logimpl.h>
10: #include <petsctime.h>
11: #include <petscviewer.h>
12: #include ../src/sys/logging/xmllogevent.h
13: #include ../src/sys/logging/xmlviewer.h
15: #if defined(PETSC_USE_LOG)
17: /*
18: * Support for nested PetscTimers
19: *
20: * PetscTimers keep track of a lot of useful information: Wall clock times,
21: * message passing statistics, flop counts. Information about the nested structure
22: * of the timers is lost. Example:
23: *
24: * 7:30 Start: awake
25: * 7:30 Start: morning routine
26: * 7:40 Start: eat
27: * 7:49 Done: eat
28: * 7:43 Done: morning routine
29: * 8:15 Start: work
30: * 12:15 Start: eat
31: * 12:45 Done: eat
32: * 16:00 Done: work
33: * 16:30 Start: evening routine
34: * 18:30 Start: eat
35: * 19:15 Done: eat
36: * 22:00 Done: evening routine
37: * 22:00 Done: awake
38: *
39: * Petsc timers provide the following timer results:
40: *
41: * awake: 1 call 14:30 hours
42: * morning routine: 1 call 0:13 hours
43: * eat: 3 calls 1:24 hours
44: * work: 1 call 7:45 hours
45: * evening routine 1 call 5:30 hours
46: *
47: * Nested timers can be used to get the following table:
48: *
49: * [1 call]: awake 14:30 hours
50: * [1 call]: morning routine 0:13 hours ( 2 % of awake)
51: * [1 call]: eat 0:09 hours (69 % of morning routine)
52: * rest (morning routine) 0:04 hours (31 % of morning routine)
53: * [1 call]: work 7:45 hours (53 % of awake)
54: * [1 call]: eat 0:30 hours ( 6 % of work)
55: * rest (work) 7:15 hours (94 % of work)
56: * [1 call]: evening routine 5:30 hours (38 % of awake)
57: * [1 call]: eat 0:45 hours (14 % of evening routine)
58: * rest (evening routine) 4:45 hours (86 % of morning routine)
59: *
60: * We ignore the concept of 'stages', because these seem to be conflicting notions, or at least,
61: * the nested timers make the stages unnecessary.
62: *
63: */
65: /*
66: * Data structures for keeping track of nested timers:
67: *
68: * nestedEvents: information about the timers that have actually been activated
69: * dftParentActive: if a timer is started now, it is part of (nested inside) the dftParentActive
70: *
71: * The Default-timers are used to time the nested timers. Every nested timer corresponds to
72: * (one or more) default timers, where one of the default timers has the same event-id as the
73: * nested one.
74: *
75: * Because of the risk of confusion between nested timer ids and default timer ids, we
76: * introduce a typedef for nested events (NestedEventId) and use the existing type PetscLogEvent
77: * only for default events. Also, all nested event variables are prepended with 'nst', and
78: * default timers with 'dft'.
79: */
80: typedef PetscLogEvent NestedEventId;
81: typedef struct {
82: NestedEventId nstEvent; /* event-code for this nested event, argument 'event' in PetscLogEventStartNested */
83: int nParents; /* number of 'dftParents': the default timer which was the dftParentActive when this nested timer was activated */
84: PetscLogEvent *dftParentsSorted; /* The default timers which were the dftParentActive when this nested event was started */
85: PetscLogEvent *dftEvents; /* The default timers which represent the different 'instances' of this nested event */
87: PetscLogEvent *dftParents; /* The default timers which were the dftParentActive when this nested event was started */
88: PetscLogEvent *dftEventsSorted; /* The default timers which represent the different 'instances' of this nested event */
89: } PetscNestedEvent;
91: static PetscLogEvent dftParentActive = 0;
92: static int nNestedEvents = 0;
93: static int nNestedEventsAllocated = 0;
94: static PetscNestedEvent *nestedEvents = NULL;
95: static PetscLogDouble threshTime = 0.01; /* initial value was .1 */
97: static PetscErrorCode PetscLogEventBeginNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4);
98: static PetscErrorCode PetscLogEventEndNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4);
102: PetscErrorCode PetscLogNestedBegin(void)
103: {
104: PetscErrorCode ierr;
106: if (nestedEvents) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_COR,"nestedEvents already allocated");
108: nNestedEventsAllocated=10;
109: PetscMalloc1(nNestedEventsAllocated,&nestedEvents);
112: dftParentActive = 0;
113: nNestedEvents =1;
115: /* 'Awake' is nested event 0. It has no parents */
116: nestedEvents[0].nstEvent = 0;
117: nestedEvents[0].nParents = 0;
118: nestedEvents[0].dftParentsSorted = NULL;
119: nestedEvents[0].dftEvents = NULL;
120: nestedEvents[0].dftParents = NULL;
121: nestedEvents[0].dftEventsSorted = NULL;
123: PetscLogSet(PetscLogEventBeginNested, PetscLogEventEndNested);
124: return(0);
125: }
127: /* Delete the data structures for the nested timers */
130: PetscErrorCode PetscLogNestedEnd(void)
131: {
132: PetscErrorCode ierr;
133: int i;
136: if (!nestedEvents) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_COR,"nestedEvents does not exist");
138: for (i=0; i<nNestedEvents; i++) {
139: PetscFree4(nestedEvents[i].dftParentsSorted,nestedEvents[i].dftEventsSorted,nestedEvents[i].dftParents,nestedEvents[i].dftEvents);
140: }
141: PetscFree(nestedEvents);
142: nestedEvents = NULL;
143: nNestedEvents = 0;
144: nNestedEventsAllocated = 0;
145: return(0);
146: }
149: /*
150: * UTILITIES: FIND STUFF IN SORTED ARRAYS
151: *
152: * Utility: find a default timer in a sorted array */
155: static PetscErrorCode PetscLogEventFindDefaultTimer(PetscLogEvent dftIndex, /* index to be found */
156: const PetscLogEvent *dftArray, /* sorted array of PetscLogEvent-ids */
157: int narray, /* dimension of dftArray */
158: int *entry) /* entry in the array where dftIndex may be found;
159: * if dftArray[entry] != dftIndex, then dftIndex is not part of dftArray
160: * In that case, the dftIndex can be inserted at this entry. */
161: {
163: if (narray==0 || dftIndex <= dftArray[0]) {
164: *entry = 0;
165: } else if (dftIndex > dftArray[narray-1]) {
166: *entry = narray;
167: } else {
168: int ihigh=narray-1, ilow=0;
169: while (ihigh>ilow) {
170: const int imiddle = (ihigh+ilow)/2;
171: if (dftArray[imiddle] > dftIndex) {
172: ihigh=imiddle;
173: } else if (dftArray[imiddle]<dftIndex) {
174: ilow =imiddle+1;
175: } else {
176: ihigh=imiddle;
177: ilow =imiddle;
178: }
179: }
180: *entry = ihigh;
181: }
182: return(0);
183: }
185: /* Utility: find the nested event with given identification */
188: static PetscErrorCode PetscLogEventFindNestedTimer(NestedEventId nstEvent, /* Nested event to be found */
189: int *entry) /* entry in the nestedEvents where nstEvent may be found;
190: if nestedEvents[entry].nstEvent != nstEvent, then index is not part of iarray */
191: {
194: if (nNestedEvents==0 || nstEvent <= nestedEvents[0].nstEvent) {
195: *entry = 0;
196: } else if (nstEvent > nestedEvents[nNestedEvents-1].nstEvent) {
197: *entry = nNestedEvents;
198: } else {
199: int ihigh=nNestedEvents-1, ilow=0;
200: while (ihigh>ilow) {
201: const int imiddle = (ihigh+ilow)/2;
202: if (nestedEvents[imiddle].nstEvent > nstEvent) {
203: ihigh=imiddle;
204: } else if (nestedEvents[imiddle].nstEvent<nstEvent) {
205: ilow =imiddle+1;
206: } else {
207: ihigh=imiddle;
208: ilow =imiddle;
209: }
210: }
211: *entry = ihigh;
212: }
213: return(0);
214: }
216: /******************************************************************************************/
217: /* Start a nested event */
220: static PetscErrorCode PetscLogEventBeginNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
221: {
222: PetscErrorCode ierr;
223: int entry, pentry, tentry,i;
224: PetscLogEvent dftEvent;
227: PetscLogEventFindNestedTimer(nstEvent, &entry);
228: if (entry>=nNestedEvents || nestedEvents[entry].nstEvent != nstEvent) {
229: /* Nested event doesn't exist yet: create it */
231: if (nNestedEvents==nNestedEventsAllocated) {
232: /* Enlarge and re-allocate nestedEvents if needed */
233: PetscNestedEvent *tmp = nestedEvents;
234: PetscMalloc1(2*nNestedEvents,&nestedEvents);
235: nNestedEventsAllocated*=2;
236: PetscMemcpy(nestedEvents, tmp, nNestedEvents*sizeof(PetscNestedEvent));
237: PetscFree(tmp);
238: }
240: /* Clear space in nestedEvents for new nested event */
241: nNestedEvents++;
242: for (i = nNestedEvents-1; i>entry; i--) {
243: nestedEvents[i] = nestedEvents[i-1];
244: }
246: /* Create event in nestedEvents */
247: nestedEvents[entry].nstEvent = nstEvent;
248: nestedEvents[entry].nParents=1;
249: PetscMalloc4(1,&nestedEvents[entry].dftParentsSorted,1,&nestedEvents[entry].dftEventsSorted,1,&nestedEvents[entry].dftParents,1,&nestedEvents[entry].dftEvents);
251: /* Fill in new event */
252: pentry = 0;
253: dftEvent = (PetscLogEvent) nstEvent;
255: nestedEvents[entry].nstEvent = nstEvent;
256: nestedEvents[entry].dftParents[pentry] = dftParentActive;
257: nestedEvents[entry].dftEvents[pentry] = dftEvent;
258: nestedEvents[entry].dftParentsSorted[pentry] = dftParentActive;
259: nestedEvents[entry].dftEventsSorted[pentry] = dftEvent;
261: } else {
262: /* Nested event exists: find current dftParentActive among parents */
263: PetscLogEvent *dftParentsSorted = nestedEvents[entry].dftParentsSorted;
264: PetscLogEvent *dftEvents = nestedEvents[entry].dftEvents;
265: int nParents = nestedEvents[entry].nParents;
267: PetscLogEventFindDefaultTimer( dftParentActive, dftParentsSorted, nParents, &pentry);
269: if (pentry>=nParents || dftParentActive != dftParentsSorted[pentry]) {
270: /* dftParentActive not in the list: add it to the list */
271: int i;
272: PetscLogEvent *dftParents = nestedEvents[entry].dftParents;
273: PetscLogEvent *dftEventsSorted = nestedEvents[entry].dftEventsSorted;
274: char name[100];
276: /* Register a new default timer */
277: sprintf(name, "%d -> %d", (int) dftParentActive, (int) nstEvent);
278: PetscLogEventRegister(name, 0, &dftEvent);
279: PetscLogEventFindDefaultTimer( dftEvent, dftEventsSorted, nParents, &tentry);
281: /* Reallocate parents and dftEvents to make space for new parent */
282: PetscMalloc4(1+nParents,&nestedEvents[entry].dftParentsSorted,1+nParents,&nestedEvents[entry].dftEventsSorted,1+nParents,&nestedEvents[entry].dftParents,1+nParents,&nestedEvents[entry].dftEvents);
283: PetscMemcpy(nestedEvents[entry].dftParentsSorted, dftParentsSorted, nParents*sizeof(PetscLogEvent));
284: PetscMemcpy(nestedEvents[entry].dftEventsSorted, dftEventsSorted, nParents*sizeof(PetscLogEvent));
285: PetscMemcpy(nestedEvents[entry].dftParents, dftParents, nParents*sizeof(PetscLogEvent));
286: PetscMemcpy(nestedEvents[entry].dftEvents, dftEvents, nParents*sizeof(PetscLogEvent));
287: PetscFree4(dftParentsSorted,dftEventsSorted,dftParents,dftEvents);
289: dftParents = nestedEvents[entry].dftParents;
290: dftEvents = nestedEvents[entry].dftEvents;
291: dftParentsSorted = nestedEvents[entry].dftParentsSorted;
292: dftEventsSorted = nestedEvents[entry].dftEventsSorted;
294: nestedEvents[entry].nParents++;
295: nParents++;
297: for (i = nParents-1; i>pentry; i--) {
298: dftParentsSorted[i] = dftParentsSorted[i-1];
299: dftEvents[i] = dftEvents[i-1];
300: }
301: for (i = nParents-1; i>tentry; i--) {
302: dftParents[i] = dftParents[i-1];
303: dftEventsSorted[i] = dftEventsSorted[i-1];
304: }
306: /* Fill in the new default timer */
307: dftParentsSorted[pentry] = dftParentActive;
308: dftEvents[pentry] = dftEvent;
309: dftParents[tentry] = dftParentActive;
310: dftEventsSorted[tentry] = dftEvent;
312: } else {
313: /* dftParentActive was found: find the corresponding default 'dftEvent'-timer */
314: dftEvent = nestedEvents[entry].dftEvents[pentry];
315: }
316: }
318: /* Start the default 'dftEvent'-timer and update the dftParentActive */
319: PetscLogEventBeginDefault(dftEvent,t,o1,o2,o3,o4);
320: dftParentActive = dftEvent;
321: return(0);
322: }
324: /* End a nested event */
327: static PetscErrorCode PetscLogEventEndNested(NestedEventId nstEvent, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
328: {
329: PetscErrorCode ierr;
330: int entry, pentry, nParents;
331: PetscLogEvent *dftEventsSorted;
334: /* Find the nested event */
335: PetscLogEventFindNestedTimer(nstEvent, &entry);
336: if (entry>=nNestedEvents) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Logging event %d larger than number of events %d",entry,nNestedEvents);
337: if (nestedEvents[entry].nstEvent != nstEvent) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Logging event %d had unbalanced begin/end pairs does not match %d",entry,nstEvent);
338: dftEventsSorted = nestedEvents[entry].dftEventsSorted;
339: nParents = nestedEvents[entry].nParents;
341: /* Find the current default timer among the 'dftEvents' of this event */
342: PetscLogEventFindDefaultTimer( dftParentActive, dftEventsSorted, nParents, &pentry);
344: if (pentry>=nParents) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Entry %d is larger than number of parents %d",pentry,nParents);
345: if (dftEventsSorted[pentry] != dftParentActive) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Active parent is %d, but we seem to be closing %d",dftParentActive,dftEventsSorted[pentry]);
347: /* Stop the default timer and update the dftParentActive */
348: PetscLogEventEndDefault(dftParentActive,t,o1,o2,o3,o4);
349: dftParentActive = nestedEvents[entry].dftParents[pentry];
350: return(0);
351: }
353: /* Set the threshold time for logging the events
354: */
357: PetscErrorCode PetscLogSetThreshold(PetscLogDouble newThresh, PetscLogDouble *oldThresh)
358: {
360: *oldThresh = threshTime;
361: threshTime = newThresh;
362: return(0);
363: }
367: static PetscErrorCode PetscPrintExeSpecs(PetscViewer viewer)
368: {
369: PetscErrorCode ierr;
370: char arch[128],hostname[128],username[128],pname[PETSC_MAX_PATH_LEN],date[128];
371: char version[256], buildoptions[128];
372: PetscMPIInt size;
373: MPI_Comm comm;
374: size_t len;
375:
377: PetscObjectGetComm((PetscObject)viewer,&comm);
378: PetscGetArchType(arch,sizeof(arch));
379: PetscGetHostName(hostname,sizeof(hostname));
380: PetscGetUserName(username,sizeof(username));
381: PetscGetProgramName(pname,sizeof(pname));
382: PetscGetDate(date,sizeof(date));
383: PetscGetVersion(version,sizeof(version));
384: MPI_Comm_size(comm, &size);
386: PetscViewerXMLStartSection(viewer, "runspecification", "Run Specification");
387: PetscViewerXMLPutString( viewer, "executable" , "Executable" , pname );
388: PetscViewerXMLPutString( viewer, "architecture", "Architecture" , arch );
389: PetscViewerXMLPutString( viewer, "hostname" , "Host" , hostname);
390: PetscViewerXMLPutInt( viewer, "nprocesses" , "Number of processes", size );
391: PetscViewerXMLPutString( viewer, "user" , "Run by user" , username);
392: PetscViewerXMLPutString( viewer, "date" , "Started at" , date);
393: PetscViewerXMLPutString( viewer, "petscrelease", "Petsc Release", version);
394: #if defined(PETSC_USE_DEBUG)
395: # if defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_FORTRAN_KERNELS)
396: sprintf(buildoptions, "Debug, ComplexC++Kernels");
397: # else
398: sprintf(buildoptions, "Debug");
399: # endif
400: #else
401: # if defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_FORTRAN_KERNELS)
402: sprintf(buildoptions, "ComplexC++Kernels");
403: # endif
404: #endif
405: PetscStrlen(buildoptions,&len);
406: if (len) {
407: PetscViewerXMLPutString(viewer, "petscbuildoptions", "Petsc build options", buildoptions);
408: }
409: PetscViewerXMLEndSection(viewer, "runspecification");
410: return(0);
411: }
413: /* Print the global performance: max, max/min, average and total of
414: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
415: */
418: static PetscErrorCode PetscPrintXMLGlobalPerformanceElement(PetscViewer viewer, const char *name, const char *desc, PetscLogDouble max, PetscLogDouble ratio, PetscLogDouble avg, PetscLogDouble tot)
419: {
423: PetscViewerXMLStartSection(viewer, name, desc);
424: PetscViewerXMLPutDouble(viewer, "max", NULL, max, "%e");
425: PetscViewerXMLPutDouble(viewer, "ratio", NULL, ratio, "%f");
426: if (avg>-1.0) {
427: PetscViewerXMLPutDouble(viewer, "average", NULL, avg, "%e");
428: }
429: if (tot>-1.0) {
430: PetscViewerXMLPutDouble(viewer, "total", NULL, tot, "%e");
431: }
432: PetscViewerXMLEndSection(viewer, name);
433: return(0);
434: }
436: /* Print the global performance: max, max/min, average and total of
437: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
438: */
441: static PetscErrorCode PetscPrintGlobalPerformance(PetscViewer viewer, PetscLogDouble locTotalTime)
442: {
443: PetscErrorCode ierr;
444: PetscLogDouble min, max, tot, ratio, avg;
445: PetscLogDouble flops, mem, red, mess;
446: PetscMPIInt size;
447: MPI_Comm comm;
450: PetscObjectGetComm((PetscObject)viewer,&comm);
451: MPI_Comm_size(comm, &size);
453: /* Must preserve reduction count before we go on */
454: red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
456: /* Calculate summary information */
457: PetscViewerXMLStartSection(viewer, "globalperformance", "Global performance");
459: /* Time */
460: MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
461: MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
462: MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
463: avg = (tot)/((PetscLogDouble) size);
464: if (min != 0.0) ratio = max/min;
465: else ratio = 0.0;
466: PetscPrintXMLGlobalPerformanceElement(viewer, "time", "Time (sec)", max, ratio, avg, -1.0);
468: /* Objects */
469: avg = (PetscLogDouble) petsc_numObjects;
470: MPIU_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
471: MPIU_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
472: MPIU_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
473: avg = (tot)/((PetscLogDouble) size);
474: if (min != 0.0) ratio = max/min;
475: else ratio = 0.0;
476: PetscPrintXMLGlobalPerformanceElement(viewer, "objects", "Objects", max, ratio, avg, -1.0);
478: /* Flop */
479: MPIU_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
480: MPIU_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
481: MPIU_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
482: avg = (tot)/((PetscLogDouble) size);
483: if (min != 0.0) ratio = max/min;
484: else ratio = 0.0;
485: PetscPrintXMLGlobalPerformanceElement(viewer, "mflop", "MFlop", max/1.0E6, ratio, avg/1.0E6, tot/1.0E6);
487: /* Flop/sec -- Must talk to Barry here */
488: if (locTotalTime != 0.0) flops = petsc_TotalFlops/locTotalTime;
489: else flops = 0.0;
490: MPIU_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
491: MPIU_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
492: MPIU_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
493: avg = (tot)/((PetscLogDouble) size);
494: if (min != 0.0) ratio = max/min;
495: else ratio = 0.0;
496: PetscPrintXMLGlobalPerformanceElement(viewer, "mflops", "MFlop/sec", max/1.0E6, ratio, avg/1.0E6, tot/1.0E6);
498: /* Memory */
499: PetscMallocGetMaximumUsage(&mem);
500: if (mem > 0.0) {
501: MPIU_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
502: MPIU_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
503: MPIU_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
504: avg = (tot)/((PetscLogDouble) size);
505: if (min != 0.0) ratio = max/min;
506: else ratio = 0.0;
507: PetscPrintXMLGlobalPerformanceElement(viewer, "memory", "Memory (MiB)", max/1024.0/1024.0, ratio, avg/1024.0/1024.0, tot/1024.0/1024.0);
508: }
509: /* Messages */
510: mess = 0.5*(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
511: MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
512: MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
513: MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
514: avg = (tot)/((PetscLogDouble) size);
515: if (min != 0.0) ratio = max/min;
516: else ratio = 0.0;
517: PetscPrintXMLGlobalPerformanceElement(viewer, "messagetransfers", "MPI Message Transfers", max, ratio, avg, tot);
519: /* Message Volume */
520: mess = 0.5*(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
521: MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
522: MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
523: MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
524: avg = (tot)/((PetscLogDouble) size);
525: if (min != 0.0) ratio = max/min;
526: else ratio = 0.0;
527: PetscPrintXMLGlobalPerformanceElement(viewer, "messagevolume", "MPI Message Volume (MiB)", max/1024.0/1024.0, ratio, avg/1024.0/1024.0, tot/1024.0/1024.0);
529: /* Reductions */
530: MPIU_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
531: MPIU_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
532: MPIU_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
533: if (min != 0.0) ratio = max/min;
534: else ratio = 0.0;
535: PetscPrintXMLGlobalPerformanceElement(viewer, "reductions", "MPI Reductions", max, ratio, -1, -1);
536: PetscViewerXMLEndSection(viewer, "globalperformance");
537: return(0);
538: }
540: typedef struct {
541: PetscLogEvent dftEvent;
542: NestedEventId nstEvent;
543: PetscLogEvent dftParent;
544: NestedEventId nstParent;
545: PetscBool own;
546: int depth;
547: NestedEventId* nstPath;
548: } PetscNestedEventTree;
550: /* Compare timers to sort them in the tree */
551: static int compareTreeItems(const void *item1_, const void *item2_)
552: {
553: int i;
554: PetscNestedEventTree *item1 = (PetscNestedEventTree *) item1_;
555: PetscNestedEventTree *item2 = (PetscNestedEventTree *) item2_;
556: for (i=0; i<PetscMin(item1->depth,item2->depth); i++) {
557: if (item1->nstPath[i]<item2->nstPath[i]) return -1;
558: if (item1->nstPath[i]>item2->nstPath[i]) return +1;
559: }
560: if (item1->depth < item2->depth) return -1;
561: if (item1->depth > item2->depth) return 1;
562: return 0;
563: }
564: /*
565: * Do MPI communication to get the complete, nested calling tree for all processes: there may be
566: * calls that happen in some processes, but not in others.
567: *
568: * The output, tree[nTimers] is an array of PetscNestedEventTree-structs.
569: * The tree is sorted so that the timers can be printed in the order of appearance.
570: *
571: * For tree-items which appear in the trees of multiple processes (which will be most items), the
572: * following rule is followed:
573: * + if information from my own process is available, then that is the information stored in tree.
574: * otherwise it is some other process's information.
575: */
578: static PetscErrorCode PetscCreateLogTreeNested(PetscViewer viewer, PetscNestedEventTree **p_tree, int *p_nTimers)
579: {
580: PetscNestedEventTree *tree = NULL, *newTree;
581: int *treeIndices;
582: int nTimers, totalNTimers, i, j, iTimer0, maxDefaultTimer;
583: int yesno;
584: PetscBool done;
585: PetscErrorCode ierr;
586: int maxdepth;
587: int depth;
588: int illegalEvent;
589: int iextra;
590: PetscStageLog stageLog;
591: NestedEventId *nstPath, *nstMyPath;
592: MPI_Comm comm;
595: PetscObjectGetComm((PetscObject)viewer,&comm);
596: PetscLogGetStageLog(&stageLog);
598: /* Calculate memory needed to store everybody's information and allocate tree */
599: nTimers = 0;
600: for (i=0; i<nNestedEvents; i++) nTimers+=nestedEvents[i].nParents;
602: PetscMalloc1(nTimers,&tree);
604: /* Fill tree with readily available information */
605: iTimer0 = 0;
606: maxDefaultTimer =0;
607: for (i=0; i<nNestedEvents; i++) {
608: int nParents = nestedEvents[i].nParents;
609: NestedEventId nstEvent = nestedEvents[i].nstEvent;
610: PetscLogEvent *dftParentsSorted = nestedEvents[i].dftParentsSorted;
611: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
612: for (j=0; j<nParents; j++) {
613: maxDefaultTimer = PetscMax(dftEvents[j],maxDefaultTimer);
615: tree[iTimer0+j].dftEvent = dftEvents[j];
616: tree[iTimer0+j].nstEvent = nstEvent;
617: tree[iTimer0+j].dftParent = dftParentsSorted[j];
618: tree[iTimer0+j].own = PETSC_TRUE;
620: tree[iTimer0+j].nstParent = 0;
621: tree[iTimer0+j].depth = 0;
622: tree[iTimer0+j].nstPath = NULL;
623: }
624: iTimer0 += nParents;
625: }
627: /* Calculate the global maximum for the default timer index, so array treeIndices can
628: * be allocated only once */
629: MPIU_Allreduce(&maxDefaultTimer, &j, 1, MPI_INT, MPI_MAX, comm);
630: maxDefaultTimer = j;
632: /* Find default timer's place in the tree */
633: PetscCalloc1(maxDefaultTimer+1,&treeIndices);
634: treeIndices[0] = 0;
635: for (i=1; i<nTimers; i++) {
636: PetscLogEvent dftEvent = tree[i].dftEvent;
637: treeIndices[dftEvent] = i;
638: }
640: /* Find each dftParent's nested identification */
641: for (i=0; i<nTimers; i++) {
642: PetscLogEvent dftParent = tree[i].dftParent;
643: if (dftParent) {
644: int j = treeIndices[dftParent];
645: tree[i].nstParent = tree[j].nstEvent;
646: }
647: }
649: /* Find depths for each timer path */
650: done = PETSC_FALSE;
651: maxdepth = 0;
652: tree[0].depth=1;
653: while (!done) {
654: done = PETSC_TRUE;
655: for (i=1; i<nTimers; i++) {
656: int j = treeIndices[tree[i].dftParent];
657: if (j==0) {
658: tree[i].depth=1;
659: } else if (tree[i].dftEvent!=0) {
660: depth = 1+tree[j].depth;
661: if (depth>tree[i].depth) {
662: done = PETSC_FALSE;
663: tree[i].depth = depth;
664: maxdepth = PetscMax(depth,maxdepth);
665: }
666: }
667: }
668: }
670: /* Allocate the paths in the entire tree */
671: for (i=0; i<nTimers; i++) {
672: depth = tree[i].depth;
673: PetscCalloc1(depth,&tree[i].nstPath);
674: }
676: /* Calculate the paths for all timers */
677: for (depth=1; depth<=maxdepth; depth++) {
678: for (i=0; i<nTimers; i++) {
679: if (tree[i].depth==depth) {
680: if (depth>1) {
681: int j = treeIndices[tree[i].dftParent];
682: PetscMemcpy(tree[i].nstPath,tree[j].nstPath,(depth-1)*sizeof(NestedEventId));
683: }
684: tree[i].nstPath[depth-1] = tree[i].nstEvent;
685: }
686: }
687: }
688: PetscFree(treeIndices);
690: /* Sort the tree on basis of the paths */
691: qsort(tree, nTimers, sizeof(PetscNestedEventTree), compareTreeItems);
693: /* Allocate an array to store paths */
694: depth = maxdepth;
695: MPIU_Allreduce(&depth, &maxdepth, 1, MPI_INT, MPI_MAX, comm);
696: PetscMalloc1(maxdepth+2, &nstPath);
697: PetscMalloc1(maxdepth+2, &nstMyPath);
699: /* Find an illegal nested event index (1+largest nested event index) */
700: illegalEvent = 1+nestedEvents[nNestedEvents-1].nstEvent;
701: i = illegalEvent;
702: MPIU_Allreduce(&i, &illegalEvent, 1, MPI_INT, MPI_MAX, comm);
704: /* First, detect timers which are not available in this process, but are available in others
705: * Allocate a new tree, that can contain all timers
706: * Then, fill the new tree with all (own and not-own) timers */
707: newTree= NULL;
708: for (yesno=0; yesno<=1; yesno++) {
709: depth = 1;
710: i = 0;
711: iextra = 0;
712: while (depth>0) {
713: int j;
714: PetscBool same;
716: /* Construct the next path in this process's tree:
717: * if necessary, supplement with invalid path entries */
718: depth++;
719: if (depth > maxdepth+1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Depth %d > maxdepth + 1 %d",depth,maxdepth+1);
720: if (i<nTimers) {
721: for (j=0; j<tree[i].depth; j++) nstMyPath[j] = tree[i].nstPath[j];
722: for (j=tree[i].depth; j<=depth; j++) nstMyPath[j] = illegalEvent;
723: } else {
724: for (j=0; j<=depth; j++) nstMyPath[j] = illegalEvent;
725: }
726:
727: /* Communicate with other processes to obtain the next path and its depth */
728: MPIU_Allreduce(nstMyPath, nstPath, depth+1, MPI_INT, MPI_MIN, comm);
729: for (j=depth-1; (int) j>=0; j--) {
730: if (nstPath[j]==illegalEvent) depth=j;
731: }
732:
733: if (depth>0) {
734: /* If the path exists */
735:
736: /* check whether the next path is the same as this process's next path */
737: same = PETSC_TRUE;
738: for (j=0; same && j<=depth; j++) { same = (same && nstMyPath[j] == nstPath[j]) ? PETSC_TRUE : PETSC_FALSE;}
739:
740: if (same) {
741: /* Register 'own path' */
742: if (newTree) newTree[i+iextra] = tree[i];
743: i++;
744: } else {
745: /* Register 'not an own path' */
746: if (newTree) {
747: newTree[i+iextra].nstEvent = nstPath[depth-1];
748: newTree[i+iextra].own = PETSC_FALSE;
749: newTree[i+iextra].depth = depth;
750: PetscMalloc1(depth, &newTree[i+iextra].nstPath);
751: for (j=0; j<depth; j++) {newTree[i+iextra].nstPath[j] = nstPath[j];}
752:
753: newTree[i+iextra].dftEvent = 0;
754: newTree[i+iextra].dftParent = 0;
755: newTree[i+iextra].nstParent = 0;
756: }
757: iextra++;
758: }
759:
760: }
761: }
763: /* Determine the size of the complete tree (with own and not-own timers) and allocate the new tree */
764: totalNTimers = nTimers + iextra;
765: if (!newTree) {
766: PetscMalloc1(totalNTimers, &newTree);
767: }
768: }
769: PetscFree(nstPath);
770: PetscFree(nstMyPath);
771: PetscFree(tree);
772: tree = newTree;
773: newTree = NULL;
775: /* Set return value and return */
776: *p_tree = tree;
777: *p_nTimers = totalNTimers;
778: return(0);
779: }
781: /*
782: * Delete the nested timer tree
783: */
786: static PetscErrorCode PetscLogFreeNestedTree(PetscNestedEventTree *tree, int nTimers)
787: {
788: int i;
789: PetscErrorCode ierr;
790:
792: for (i=0; i<nTimers; i++) {
793: PetscFree(tree[i].nstPath);
794: }
795: PetscFree(tree);
796: return(0);
797: }
799: /* Print the global performance: max, max/min, average and total of
800: * time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
801: */
804: static PetscErrorCode PetscPrintXMLNestedLinePerfResults(PetscViewer viewer, const char *name, PetscLogDouble minvalue, PetscLogDouble maxvalue, PetscLogDouble minmaxtreshold)
805: {
809: PetscViewerXMLStartSection(viewer, name, NULL);
810: if (maxvalue>minvalue*minmaxtreshold) {
811: PetscViewerXMLPutDouble(viewer, "minvalue", NULL, minvalue, "%f");
812: PetscViewerXMLPutDouble(viewer, "maxvalue", NULL, maxvalue, "%f");
813: } else {
814: PetscViewerXMLPutDouble(viewer, "value", NULL, (minvalue+maxvalue)/2.0, "%g");
815: };
816: PetscViewerXMLEndSection(viewer, name);
817: return(0);
818: }
820: #define N_COMM 8
823: static PetscErrorCode PetscLogPrintNestedLine(PetscViewer viewer,PetscEventPerfInfo perfInfo,PetscLogDouble countsPerCall,int parentCount,int depth,const char *name,PetscLogDouble totalTime,PetscBool *isPrinted)
824: {
825: PetscLogDouble time = perfInfo.time;
826: PetscLogDouble timeMx, timeMn;
827: PetscLogDouble countsPerCallMx, countsPerCallMn;
828: PetscLogDouble reductSpeedMx, reductSpeedMn;
829: PetscLogDouble flopSpeedMx, flopSpeedMn;
830: PetscLogDouble msgSpeedMx, msgSpeedMn;
831: PetscLogDouble commarr_in[N_COMM], commarr_out[N_COMM];
833: MPI_Comm comm;
836: PetscObjectGetComm((PetscObject)viewer,&comm);
838: commarr_in[0] = time;
839: commarr_in[1] = -time;
840: MPIU_Allreduce(commarr_in, commarr_out, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
841: timeMx = commarr_out[0];
842: timeMn = -commarr_out[1];
844: commarr_in[0] = time>0.0 ? perfInfo.flops/time : 0;
845: commarr_in[1] = time>0.0 ? perfInfo.numReductions/time : 0;
846: commarr_in[2] = time>0.0 ? perfInfo.messageLength/time : 0;
847: commarr_in[3] = parentCount>0 ? countsPerCall : 0;
849: commarr_in[4] = time>0.0 ? -perfInfo.flops/time : -1e30;
850: commarr_in[5] = time>0.0 ? -perfInfo.numReductions/time : -1e30;
851: commarr_in[6] = time>0.0 ? -perfInfo.messageLength/time : -1e30;
852: commarr_in[7] = parentCount>0 ? -countsPerCall : -1e30;
854: MPIU_Allreduce(commarr_in, commarr_out, N_COMM, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
856: flopSpeedMx = commarr_out[0];
857: reductSpeedMx = commarr_out[1];
858: msgSpeedMx = commarr_out[2];
859: countsPerCallMx = commarr_out[3];
861: flopSpeedMn = -commarr_out[4];
862: reductSpeedMn = -commarr_out[5];
863: msgSpeedMn = -commarr_out[6];
864: countsPerCallMn = -commarr_out[7];
866: *isPrinted = ((timeMx/totalTime) > (threshTime/100.0)) ? PETSC_TRUE : PETSC_FALSE;
867: if (isPrinted) {
868: PetscViewerXMLStartSection(viewer, "event", NULL);
869: PetscViewerXMLPutString(viewer, "name", NULL, name);
870: PetscPrintXMLNestedLinePerfResults(viewer, "time", timeMn/totalTime*100.0, timeMx/totalTime*100.0, 1.02);
873: if (countsPerCallMx<1.01 && countsPerCallMn>0.99) {
874: /* One call per parent */
875: } else {
876: PetscPrintXMLNestedLinePerfResults(viewer, "ncalls", countsPerCallMn, countsPerCallMx, 1.02);
877: }
878:
879: if (flopSpeedMx<0.01) {
880: /* NO flops: don't print */
881: } else {
882: PetscPrintXMLNestedLinePerfResults(viewer, "mflops", flopSpeedMn/1e6, flopSpeedMx/1e6, 1.05);
883: }
884:
885: if (msgSpeedMx<0.01) {
886: /* NO msgs: don't print */
887: } else {
888: PetscPrintXMLNestedLinePerfResults(viewer, "mbps", msgSpeedMn/1024.0/1024.0, msgSpeedMx/1024.0/1024.0, 1.05);
889: }
890:
891: if (reductSpeedMx<0.01) {
892: /* NO reductions: don't print */
893: } else {
894: PetscPrintXMLNestedLinePerfResults(viewer, "nreductsps", reductSpeedMn, reductSpeedMx, 1.05);
895: }
896: }
897: return(0);
898: }
900: /* Count the number of times the parent event was called */
902: static int countParents( const PetscNestedEventTree *tree, PetscEventPerfInfo *eventPerfInfo, int i)
903: {
904: if (tree[i].depth<=1) {
905: return 1; /* Main event: only once */
906: } else if (!tree[i].own) {
907: return 1; /* This event didn't happen in this process, but did in another */
908: } else {
909: int iParent;
910: for (iParent=i-1; iParent>=0; iParent--) {
911: if (tree[iParent].depth == tree[i].depth-1) break;
912: }
913: if (tree[iParent].depth != tree[i].depth-1) {
914: printf("\n\n ***** Internal error: cannot find parent ****\n\n");
915: return -2;
916: } else {
917: PetscLogEvent dftEvent = tree[iParent].dftEvent;
918: return eventPerfInfo[dftEvent].count;
919: }
920: }
921: }
923: typedef struct {
924: int id;
925: PetscLogDouble val;
926: } PetscSortItem;
928: static int compareSortItems(const void *item1_, const void *item2_)
929: {
930: PetscSortItem *item1 = (PetscSortItem *) item1_;
931: PetscSortItem *item2 = (PetscSortItem *) item2_;
932: if (item1->val > item2->val) return -1;
933: if (item1->val < item2->val) return +1;
934: return 0;
935: }
937: static PetscErrorCode PetscLogNestedPrint(PetscViewer viewer, PetscNestedEventTree *tree,int nTimers, int iStart, PetscLogDouble totalTime)
938: {
939: int depth = tree[iStart].depth;
940: const char *name;
941: int parentCount, nChildren;
942: PetscSortItem *children;
943: PetscErrorCode ierr;
944: PetscEventPerfInfo *eventPerfInfo;
945: PetscEventPerfInfo myPerfInfo, otherPerfInfo, selfPerfInfo;
946: PetscLogDouble countsPerCall;
947: PetscBool wasPrinted;
948: PetscBool childWasPrinted;
949: MPI_Comm comm;
951: {
952: /* Look up the name of the event and its PerfInfo */
953: const int stage=0;
954: PetscStageLog stageLog;
955: PetscEventRegInfo *eventRegInfo;
956: PetscLogGetStageLog(&stageLog);
957: eventRegInfo = stageLog->eventLog->eventInfo;
958: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
959: name = eventRegInfo[(PetscLogEvent) tree[iStart].nstEvent].name;
960: }
962: PetscObjectGetComm((PetscObject)viewer,&comm);
964: /* Count the number of child processes */
965: nChildren = 0;
966: {
967: int i;
968: for (i=iStart+1; i<nTimers; i++) {
969: if (tree[i].depth<=depth) break;
970: if (tree[i].depth == depth + 1) nChildren++;
971: }
972: }
974: if (nChildren>0) {
975: /* Create an array for the id-s and maxTimes of the children,
976: * leaving 2 spaces for self-time and other-time */
977: int i;
978: PetscLogDouble *times, *maxTimes;
980: PetscMalloc1(nChildren+2,&children);
981: nChildren = 0;
982: for (i=iStart+1; i<nTimers; i++) {
983: if (tree[i].depth<=depth) break;
984: if (tree[i].depth == depth + 1) {
985: children[nChildren].id = i;
986: children[nChildren].val = eventPerfInfo[tree[i].dftEvent].time ;
987: nChildren++;
988: }
989: }
991: /* Calculate the children's maximum times, to see whether children will be ignored or printed */
992: PetscMalloc1(nChildren,×);
993: for (i=0; i<nChildren; i++) { times[i] = children[i].val; }
995: PetscMalloc1(nChildren,&maxTimes);
996: MPIU_Allreduce(times, maxTimes, nChildren, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
997: PetscFree(times);
999: for (i=0; i<nChildren; i++) { children[i].val = maxTimes[i]; }
1000: PetscFree(maxTimes);
1001: }
1003: if (!tree[iStart].own) {
1004: /* Set values for a timer that was not activated in this process
1005: * (but was, in other processes of this run) */
1006: PetscMemzero(&myPerfInfo,sizeof(myPerfInfo));
1008: selfPerfInfo = myPerfInfo;
1009: otherPerfInfo = myPerfInfo;
1011: parentCount = 1;
1012: countsPerCall = 0;
1013: } else {
1014: /* Set the values for a timer that was activated in this process */
1015: int i;
1016: PetscLogEvent dftEvent = tree[iStart].dftEvent;
1018: parentCount = countParents( tree, eventPerfInfo, iStart);
1019: myPerfInfo = eventPerfInfo[dftEvent];
1020: countsPerCall = (PetscLogDouble) myPerfInfo.count / (PetscLogDouble) parentCount;
1022: selfPerfInfo = myPerfInfo;
1023: otherPerfInfo.time = 0;
1024: otherPerfInfo.flops = 0;
1025: otherPerfInfo.numMessages = 0;
1026: otherPerfInfo.messageLength = 0;
1027: otherPerfInfo.numReductions = 0;
1029: for (i=0; i<nChildren; i++) {
1030: /* For all child counters: subtract the child values from self-timers */
1032: PetscLogEvent dftChild = tree[children[i].id].dftEvent;
1033: PetscEventPerfInfo childPerfInfo = eventPerfInfo[dftChild];
1035: selfPerfInfo.time -= childPerfInfo.time;
1036: selfPerfInfo.flops -= childPerfInfo.flops;
1037: selfPerfInfo.numMessages -= childPerfInfo.numMessages;
1038: selfPerfInfo.messageLength -= childPerfInfo.messageLength;
1039: selfPerfInfo.numReductions -= childPerfInfo.numReductions;
1041: if ((children[i].val/totalTime) < (threshTime/100.0)) {
1042: /* Add them to 'other' if the time is ignored in the output */
1043: otherPerfInfo.time += childPerfInfo.time;
1044: otherPerfInfo.flops += childPerfInfo.flops;
1045: otherPerfInfo.numMessages += childPerfInfo.numMessages;
1046: otherPerfInfo.messageLength += childPerfInfo.messageLength;
1047: otherPerfInfo.numReductions += childPerfInfo.numReductions;
1048: }
1049: }
1050: }
1052: /* Main output for this timer */
1053: PetscLogPrintNestedLine(viewer, myPerfInfo, countsPerCall, parentCount, depth, name, totalTime, &wasPrinted);
1055: /* Now print the lines for the children */
1056: if (nChildren>0) {
1057: /* Calculate max-times for 'self' and 'other' */
1058: int i;
1059: PetscLogDouble times[2], maxTimes[2];
1060: times[0] = selfPerfInfo.time; times[1] = otherPerfInfo.time;
1061: MPIU_Allreduce(times, maxTimes, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1062: children[nChildren+0].id = -1;
1063: children[nChildren+0].val = maxTimes[0];
1064: children[nChildren+1].id = -2;
1065: children[nChildren+1].val = maxTimes[1];
1067: /* Now sort the children (including 'self' and 'other') on total time */
1068: qsort(children, nChildren+2, sizeof(PetscSortItem), compareSortItems);
1070: /* Print (or ignore) the children in ascending order of total time */
1071: PetscViewerXMLStartSection(viewer,"events", NULL);
1072: for (i=0; i<nChildren+2; i++) {
1073: if ((children[i].val/totalTime) < (threshTime/100.0)) {
1074: /* ignored: no output */
1075: } else if (children[i].id==-1) {
1076: PetscLogPrintNestedLine(viewer, selfPerfInfo, 1, parentCount, depth+1, "self", totalTime, &childWasPrinted);
1077: if (childWasPrinted) {
1078: PetscViewerXMLEndSection(viewer,"event");
1079: }
1080: } else if (children[i].id==-2) {
1081: size_t len;
1082: char *otherName;
1084: PetscStrlen(name,&len);
1085: PetscMalloc1(16+len,&otherName);
1086: sprintf(otherName,"%s: other-timed",name);
1087: PetscLogPrintNestedLine(viewer, otherPerfInfo, 1, 1, depth+1, otherName, totalTime, &childWasPrinted);
1088: PetscFree(otherName);
1089: if (childWasPrinted) {
1090: PetscViewerXMLEndSection(viewer,"event");
1091: }
1092: } else {
1093: /* Print the child with a recursive call to this function */
1094: PetscLogNestedPrint(viewer, tree, nTimers, children[i].id, totalTime);
1095: }
1096: }
1097: PetscViewerXMLEndSection(viewer,"events");
1098: PetscFree(children);
1099: }
1101: if (wasPrinted) {
1102: PetscViewerXMLEndSection(viewer, "event");
1103: }
1104: return 0;
1105: }
1109: static PetscErrorCode PetscLogNestedPrintTop(PetscViewer viewer, PetscNestedEventTree *tree,int nTimers, PetscLogDouble totalTime)
1110: {
1111: int nChildren;
1112: PetscSortItem *children;
1113: PetscErrorCode ierr;
1114: PetscEventPerfInfo *eventPerfInfo;
1115: MPI_Comm comm;
1118: PetscObjectGetComm((PetscObject)viewer,&comm);
1119: {
1120: /* Look up the PerfInfo */
1121: const int stage=0;
1122: PetscStageLog stageLog;
1123: PetscLogGetStageLog(&stageLog);
1124: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1125: }
1127: /* Count the number of child processes, and count total time */
1128: nChildren = 0;
1129: {
1130: int i;
1131: for (i=0; i<nTimers; i++) {
1132: if (tree[i].depth==1) nChildren++;
1133: }
1134: }
1136: if (nChildren>0) {
1137: /* Create an array for the id-s and maxTimes of the children,
1138: * leaving 2 spaces for self-time and other-time */
1139: int i;
1140: PetscLogDouble *times, *maxTimes;
1142: PetscMalloc1(nChildren,&children);
1143: nChildren = 0;
1144: for (i=0; i<nTimers; i++) {
1145: if (tree[i].depth == 1) {
1146: children[nChildren].id = i;
1147: children[nChildren].val = eventPerfInfo[tree[i].dftEvent].time ;
1148: nChildren++;
1149: }
1150: }
1151:
1152: /* Calculate the children's maximum times, to sort them */
1153: PetscMalloc1(nChildren,×);
1154: for (i=0; i<nChildren; i++) { times[i] = children[i].val; }
1156: PetscMalloc1(nChildren,&maxTimes);
1157: MPIU_Allreduce(times, maxTimes, nChildren, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1158: PetscFree(times);
1160: for (i=0; i<nChildren; i++) { children[i].val = maxTimes[i]; }
1161: PetscFree(maxTimes);
1163: /* Now sort the children on total time */
1164: qsort(children, nChildren, sizeof(PetscSortItem), compareSortItems);
1165: /* Print (or ignore) the children in ascending order of total time */
1166: PetscViewerXMLStartSection(viewer, "timertree", "Timings tree");
1167: PetscViewerXMLPutDouble(viewer, "totaltime", NULL, totalTime, "%f");
1168: PetscViewerXMLPutDouble(viewer, "timethreshold", NULL, threshTime, "%f");
1170: for (i=0; i<nChildren; i++) {
1171: if ((children[i].val/totalTime) < (threshTime/100.0)) {
1172: /* ignored: no output */
1173: } else {
1174: /* Print the child with a recursive call to this function */
1175: PetscLogNestedPrint(viewer, tree, nTimers, children[i].id, totalTime);
1176: }
1177: }
1178: PetscViewerXMLEndSection(viewer, "timertree");
1179: PetscFree(children);
1180: }
1181: return(0);
1182: }
1184: typedef struct {
1185: char *name;
1186: PetscLogDouble time;
1187: PetscLogDouble flops;
1188: PetscLogDouble numMessages;
1189: PetscLogDouble messageLength;
1190: PetscLogDouble numReductions;
1191: } PetscSelfTimer;
1195: static PetscErrorCode PetscCalcSelfTime(PetscViewer viewer, PetscSelfTimer **p_self, int *p_nstMax)
1196: {
1197: PetscErrorCode ierr;
1198: PetscEventPerfInfo *eventPerfInfo;
1199: PetscEventRegInfo *eventRegInfo;
1200: PetscSelfTimer *selftimes;
1201: PetscSelfTimer *totaltimes;
1202: NestedEventId *nstEvents;
1203: int i, maxDefaultTimer;
1204: NestedEventId nst;
1205: PetscLogEvent dft;
1206: int nstMax, nstMax_local;
1207: MPI_Comm comm;
1208:
1210: PetscObjectGetComm((PetscObject)viewer,&comm);
1211: {
1212: const int stage=0;
1213: PetscStageLog stageLog;
1214: PetscLogGetStageLog(&stageLog);
1215: eventRegInfo = stageLog->eventLog->eventInfo;
1216: eventPerfInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1217: }
1219: /* For each default timer, calculate the (one) nested timer that it corresponds to. */
1220: maxDefaultTimer =0;
1221: for (i=0; i<nNestedEvents; i++) {
1222: int nParents = nestedEvents[i].nParents;
1223: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1224: int j;
1225: for (j=0; j<nParents; j++) {
1226: maxDefaultTimer = PetscMax(dftEvents[j],maxDefaultTimer);
1227: }
1228: }
1229: PetscMalloc1(maxDefaultTimer+1,&nstEvents);
1230: for (dft=0; dft<maxDefaultTimer; dft++) {nstEvents[dft] = 0;}
1231: for (i=0; i<nNestedEvents; i++) {
1232: int nParents = nestedEvents[i].nParents;
1233: NestedEventId nstEvent = nestedEvents[i].nstEvent;
1234: PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1235: int j;
1236: for (j=0; j<nParents; j++) { nstEvents[dftEvents[j]] = nstEvent; }
1237: }
1239: /* Calculate largest nested event-ID */
1240: nstMax_local = 0;
1241: for (i=0; i<nNestedEvents; i++) { if (nestedEvents[i].nstEvent>nstMax_local) {nstMax_local = nestedEvents[i].nstEvent;} }
1242: MPIU_Allreduce(&nstMax_local, &nstMax, 1, MPI_INT, MPI_MAX, comm);
1245: /* Initialize all total-times with zero */
1246: PetscMalloc1(nstMax+1,&selftimes);
1247: PetscMalloc1(nstMax+1,&totaltimes);
1248: for (nst=0; nst<=nstMax; nst++) {
1249: totaltimes[nst].time = 0;
1250: totaltimes[nst].flops = 0;
1251: totaltimes[nst].numMessages = 0;
1252: totaltimes[nst].messageLength = 0;
1253: totaltimes[nst].numReductions = 0;
1254: totaltimes[nst].name = NULL;
1255: }
1257: /* Calculate total-times */
1258: for (i=0; i<nNestedEvents; i++) {
1259: const int nParents = nestedEvents[i].nParents;
1260: const NestedEventId nstEvent = nestedEvents[i].nstEvent;
1261: const PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1262: int j;
1263: for (j=0; j<nParents; j++) {
1264: const PetscLogEvent dftEvent = dftEvents[j];
1265: totaltimes[nstEvent].time += eventPerfInfo[dftEvent].time;
1266: totaltimes[nstEvent].flops += eventPerfInfo[dftEvent].flops;
1267: totaltimes[nstEvent].numMessages += eventPerfInfo[dftEvent].numMessages;
1268: totaltimes[nstEvent].messageLength += eventPerfInfo[dftEvent].messageLength;
1269: totaltimes[nstEvent].numReductions += eventPerfInfo[dftEvent].numReductions;
1270: }
1271: totaltimes[nstEvent].name = eventRegInfo[(PetscLogEvent) nstEvent].name;
1272: }
1274: /* Initialize: self-times := totaltimes */
1275: for (nst=0; nst<=nstMax; nst++) { selftimes[nst] = totaltimes[nst]; }
1277: /* Subtract timed supprocesses from self-times */
1278: for (i=0; i<nNestedEvents; i++) {
1279: const int nParents = nestedEvents[i].nParents;
1280: const PetscLogEvent *dftEvents = nestedEvents[i].dftEvents;
1281: const NestedEventId *dftParentsSorted = nestedEvents[i].dftParentsSorted;
1282: int j;
1283: for (j=0; j<nParents; j++) {
1284: const PetscLogEvent dftEvent = dftEvents[j];
1285: const NestedEventId nstParent = nstEvents[dftParentsSorted[j]];
1286: selftimes[nstParent].time -= eventPerfInfo[dftEvent].time;
1287: selftimes[nstParent].flops -= eventPerfInfo[dftEvent].flops;
1288: selftimes[nstParent].numMessages -= eventPerfInfo[dftEvent].numMessages;
1289: selftimes[nstParent].messageLength -= eventPerfInfo[dftEvent].messageLength;
1290: selftimes[nstParent].numReductions -= eventPerfInfo[dftEvent].numReductions;
1291: }
1292: }
1294: PetscFree(nstEvents);
1295: PetscFree(totaltimes);
1297: /* Set outputs */
1298: *p_self = selftimes;
1299: *p_nstMax = nstMax;
1300: return(0);
1301: }
1305: static PetscErrorCode PetscPrintSelfTime(PetscViewer viewer, const PetscSelfTimer *selftimes, int nstMax, PetscLogDouble totalTime)
1306: {
1307: PetscErrorCode ierr;
1308: int i;
1309: NestedEventId nst;
1310: PetscSortItem *sortSelfTimes;
1311: PetscLogDouble *times, *maxTimes;
1312: PetscEventRegInfo *eventRegInfo;
1313: const int dum_depth = 1, dum_count=1, dum_parentcount=1;
1314: PetscBool wasPrinted;
1315: MPI_Comm comm;
1318: PetscObjectGetComm((PetscObject)viewer,&comm);
1319: {
1320: PetscStageLog stageLog;
1321: PetscLogGetStageLog(&stageLog);
1322: eventRegInfo = stageLog->eventLog->eventInfo;
1323: }
1325: PetscMalloc1(nstMax+1,×);
1326: PetscMalloc1(nstMax+1,&maxTimes);
1327: for (nst=0; nst<=nstMax; nst++) { times[nst] = selftimes[nst].time;}
1328: MPIU_Allreduce(times, maxTimes, nstMax+1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1329: PetscFree(times);
1331: PetscMalloc1(nstMax+1,&sortSelfTimes);
1333: /* Sort the self-timers on basis of the largest time needed */
1334: for (nst=0; nst<=nstMax; nst++) {
1335: sortSelfTimes[nst].id = nst;
1336: sortSelfTimes[nst].val = maxTimes[nst];
1337: }
1338: PetscFree(maxTimes);
1339: qsort(sortSelfTimes, nstMax+1, sizeof(PetscSortItem), compareSortItems);
1341: PetscViewerXMLStartSection(viewer, "selftimertable", "Self-timings");
1342: PetscViewerXMLPutDouble(viewer, "totaltime", NULL, totalTime, "%f");
1344: for (i=0; i<=nstMax; i++) {
1345: if ((sortSelfTimes[i].val/totalTime) >= (threshTime/100.0)) {
1346: NestedEventId nstEvent = sortSelfTimes[i].id;
1347: PetscEventPerfInfo selfPerfInfo;
1348: const char *name = eventRegInfo[(PetscLogEvent) nstEvent].name;
1350: selfPerfInfo.time = selftimes[nstEvent].time ;
1351: selfPerfInfo.flops = selftimes[nstEvent].flops;
1352: selfPerfInfo.numMessages = selftimes[nstEvent].numMessages;
1353: selfPerfInfo.messageLength = selftimes[nstEvent].messageLength;
1354: selfPerfInfo.numReductions = selftimes[nstEvent].numReductions;
1355:
1356: PetscLogPrintNestedLine(viewer, selfPerfInfo, dum_count, dum_parentcount, dum_depth, name, totalTime, &wasPrinted);
1357: if (wasPrinted){
1358: PetscViewerXMLEndSection(viewer, "event");
1359: }
1360: }
1361: }
1362: PetscViewerXMLEndSection(viewer, "selftimertable");
1363: PetscFree(sortSelfTimes);
1364: return(0);
1365: }
1369: PetscErrorCode PetscLogView_Nested(PetscViewer viewer)
1370: {
1371: MPI_Comm comm;
1372: PetscErrorCode ierr;
1373: PetscLogDouble locTotalTime, globTotalTime;
1374: PetscNestedEventTree *tree = NULL;
1375: PetscSelfTimer *selftimers = NULL;
1376: int nTimers = 0, nstMax = 0;
1377: PetscViewerType vType;
1380: PetscViewerGetType(viewer,&vType);
1382: /* Set useXMLFormat that controls the format in all local PetscPrint.. functions */
1383: PetscViewerInitASCII_XML(viewer);
1385: PetscObjectGetComm((PetscObject)viewer,&comm);
1387: PetscViewerASCIIPrintf(viewer, "<!-- PETSc Performance Summary: -->\n");
1388: PetscViewerXMLStartSection(viewer, "petscroot", NULL);
1390: /* Get the total elapsed time, local and global maximum */
1391: PetscTime(&locTotalTime); locTotalTime -= petsc_BaseTime;
1392: MPIU_Allreduce(&locTotalTime, &globTotalTime, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1394: /* Print global information about this run */
1395: PetscPrintExeSpecs(viewer);
1396: PetscPrintGlobalPerformance(viewer, locTotalTime);
1397:
1398: /* Collect nested timer tree info from all processes */
1399: PetscCreateLogTreeNested(viewer, &tree, &nTimers);
1400: PetscLogNestedPrintTop(viewer, tree, nTimers, globTotalTime);
1401: PetscLogFreeNestedTree(tree, nTimers);
1403: /* Calculate self-time for all (not-nested) events */
1404: PetscCalcSelfTime(viewer, &selftimers, &nstMax);
1405: PetscPrintSelfTime(viewer, selftimers, nstMax, globTotalTime);
1406: PetscFree(selftimers);
1408: PetscViewerXMLEndSection(viewer, "petscroot");
1409: PetscViewerFinalASCII_XML(viewer);
1410: PetscLogNestedEnd();
1411: return(0);
1412: }
1414: #endif