Actual source code: plog.c
petsc-3.3-p7 2013-05-11
2: /*
3: PETSc code to log object creation and destruction and PETSc events.
5: This provides the public API used by the rest of PETSc and by users.
7: These routines use a private API that is not used elsewhere in PETSc and is not
8: accessible to users. The private API is defined in logimpl.h and the utils directory.
10: */
11: #include <petscsys.h> /*I "petscsys.h" I*/
12: #include <petsctime.h>
13: #if defined(PETSC_HAVE_MPE)
14: #include <mpe.h>
15: #endif
16: #include <stdarg.h>
17: #include <sys/types.h>
18: #if defined(PETSC_HAVE_STDLIB_H)
19: #include <stdlib.h>
20: #endif
21: #if defined(PETSC_HAVE_MALLOC_H)
22: #include <malloc.h>
23: #endif
24: #include <petsc-private/logimpl.h>
25: #if defined(PETSC_THREADCOMM_ACTIVE)
26: #include <petscthreadcomm.h>
27: #endif
29: PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT;
31: #if defined(PETSC_CLANGUAGE_CXX) && !defined(PETSC_USE_EXTERN_CXX)
32: std::map<std::string,PETSc::LogEvent> PETSc::Log::event_registry;
33: std::map<std::string,PETSc::LogStage> PETSc::Log::stage_registry;
34: #endif
36: #if defined(PETSC_USE_LOG)
37: #include <petscmachineinfo.h>
38: #include <petscconfiginfo.h>
40: /* used in the MPI_XXX() count macros in petsclog.h */
42: /* Action and object logging variables */
43: Action *petsc_actions = PETSC_NULL;
44: Object *petsc_objects = PETSC_NULL;
45: PetscBool petsc_logActions = PETSC_FALSE;
46: PetscBool petsc_logObjects = PETSC_FALSE;
47: int petsc_numActions = 0, petsc_maxActions = 100;
48: int petsc_numObjects = 0, petsc_maxObjects = 100;
49: int petsc_numObjectsDestroyed = 0;
51: /* Global counters */
52: PetscLogDouble petsc_BaseTime = 0.0;
53: PetscLogDouble petsc_TotalFlops = 0.0; /* The number of flops */
54: PetscLogDouble petsc_tmp_flops = 0.0; /* The incremental number of flops */
55: PetscLogDouble petsc_send_ct = 0.0; /* The number of sends */
56: PetscLogDouble petsc_recv_ct = 0.0; /* The number of receives */
57: PetscLogDouble petsc_send_len = 0.0; /* The total length of all sent messages */
58: PetscLogDouble petsc_recv_len = 0.0; /* The total length of all received messages */
59: PetscLogDouble petsc_isend_ct = 0.0; /* The number of immediate sends */
60: PetscLogDouble petsc_irecv_ct = 0.0; /* The number of immediate receives */
61: PetscLogDouble petsc_isend_len = 0.0; /* The total length of all immediate send messages */
62: PetscLogDouble petsc_irecv_len = 0.0; /* The total length of all immediate receive messages */
63: PetscLogDouble petsc_wait_ct = 0.0; /* The number of waits */
64: PetscLogDouble petsc_wait_any_ct = 0.0; /* The number of anywaits */
65: PetscLogDouble petsc_wait_all_ct = 0.0; /* The number of waitalls */
66: PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */
67: PetscLogDouble petsc_allreduce_ct = 0.0; /* The number of reductions */
68: PetscLogDouble petsc_gather_ct = 0.0; /* The number of gathers and gathervs */
69: PetscLogDouble petsc_scatter_ct = 0.0; /* The number of scatters and scattervs */
71: /* Logging functions */
72: PetscErrorCode (*PetscLogPHC)(PetscObject) = PETSC_NULL;
73: PetscErrorCode (*PetscLogPHD)(PetscObject) = PETSC_NULL;
74: PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = PETSC_NULL;
75: PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = PETSC_NULL;
77: /* Tracing event logging variables */
78: FILE *petsc_tracefile = PETSC_NULL;
79: int petsc_tracelevel = 0;
80: const char *petsc_traceblanks = " ";
81: char petsc_tracespace[128] = " ";
82: PetscLogDouble petsc_tracetime = 0.0;
83: static PetscBool PetscLogBegin_PrivateCalled = PETSC_FALSE;
85: /*---------------------------------------------- General Functions --------------------------------------------------*/
88: /*@C
89: PetscLogDestroy - Destroys the object and event logging data and resets the global counters.
91: Not Collective
93: Notes:
94: This routine should not usually be used by programmers. Instead employ
95: PetscLogStagePush() and PetscLogStagePop().
97: Level: developer
99: .keywords: log, destroy
100: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogView(), PetscLogStagePush(), PlogStagePop()
101: @*/
102: PetscErrorCode PetscLogDestroy(void)
103: {
104: PetscStageLog stageLog;
108: PetscFree(petsc_actions);
109: PetscFree(petsc_objects);
110: PetscLogSet(PETSC_NULL, PETSC_NULL);
112: /* Resetting phase */
113: PetscLogGetStageLog(&stageLog);
114: PetscStageLogDestroy(stageLog);
115: petsc_TotalFlops = 0.0;
116: petsc_numActions = 0;
117: petsc_numObjects = 0;
118: petsc_numObjectsDestroyed = 0;
119: petsc_maxActions = 100;
120: petsc_maxObjects = 100;
121: petsc_actions = PETSC_NULL;
122: petsc_objects = PETSC_NULL;
123: petsc_logActions = PETSC_FALSE;
124: petsc_logObjects = PETSC_FALSE;
125: petsc_BaseTime = 0.0;
126: petsc_TotalFlops = 0.0;
127: petsc_tmp_flops = 0.0;
128: petsc_send_ct = 0.0;
129: petsc_recv_ct = 0.0;
130: petsc_send_len = 0.0;
131: petsc_recv_len = 0.0;
132: petsc_isend_ct = 0.0;
133: petsc_irecv_ct = 0.0;
134: petsc_isend_len = 0.0;
135: petsc_irecv_len = 0.0;
136: petsc_wait_ct = 0.0;
137: petsc_wait_any_ct = 0.0;
138: petsc_wait_all_ct = 0.0;
139: petsc_sum_of_waits_ct = 0.0;
140: petsc_allreduce_ct = 0.0;
141: petsc_gather_ct = 0.0;
142: petsc_scatter_ct = 0.0;
143: PETSC_LARGEST_EVENT = PETSC_EVENT;
144: PetscLogPHC = PETSC_NULL;
145: PetscLogPHD = PETSC_NULL;
146: petsc_tracefile = PETSC_NULL;
147: petsc_tracelevel = 0;
148: petsc_traceblanks = " ";
149: petsc_tracespace[0] = ' '; petsc_tracespace[1] = 0;
150: petsc_tracetime = 0.0;
151: PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID;
152: PETSC_OBJECT_CLASSID = 0;
153: petsc_stageLog = 0;
154: PetscLogBegin_PrivateCalled = PETSC_FALSE;
155: return(0);
156: }
160: /*@C
161: PetscLogSet - Sets the logging functions called at the beginning and ending of every event.
163: Not Collective
165: Input Parameters:
166: + b - The function called at beginning of event
167: - e - The function called at end of event
169: Level: developer
171: .seealso: PetscLogDump(), PetscLogBegin(), PetscLogAllBegin(), PetscLogTraceBegin()
172: @*/
173: PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject),
174: PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject))
175: {
177: PetscLogPLB = b;
178: PetscLogPLE = e;
179: return(0);
180: }
182: #if defined(PETSC_HAVE_CHUD)
183: #include <CHUD/CHUD.h>
184: #endif
185: #if defined(PETSC_HAVE_PAPI)
186: #include <papi.h>
187: int PAPIEventSet = PAPI_NULL;
188: #endif
190: /*------------------------------------------- Initialization Functions ----------------------------------------------*/
193: PetscErrorCode PetscLogBegin_Private(void)
194: {
195: int stage;
196: PetscBool opt;
197: PetscErrorCode ierr;
200: if (PetscLogBegin_PrivateCalled) return(0);
201: PetscLogBegin_PrivateCalled = PETSC_TRUE;
203: PetscOptionsHasName(PETSC_NULL, "-log_exclude_actions", &opt);
204: if (opt) {
205: petsc_logActions = PETSC_FALSE;
206: }
207: PetscOptionsHasName(PETSC_NULL, "-log_exclude_objects", &opt);
208: if (opt) {
209: petsc_logObjects = PETSC_FALSE;
210: }
211: if (petsc_logActions) {
212: PetscMalloc(petsc_maxActions * sizeof(Action), &petsc_actions);
213: }
214: if (petsc_logObjects) {
215: PetscMalloc(petsc_maxObjects * sizeof(Object), &petsc_objects);
216: }
217: PetscLogPHC = PetscLogObjCreateDefault;
218: PetscLogPHD = PetscLogObjDestroyDefault;
219: /* Setup default logging structures */
220: PetscStageLogCreate(&petsc_stageLog);
221: PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage);
222: #if defined(PETSC_HAVE_CHUD)
223: chudInitialize();
224: chudAcquireSamplingFacility(CHUD_BLOCKING);
225: chudSetSamplingDevice(chudCPU1Dev);
226: chudSetStartDelay(0,chudNanoSeconds);
227: chudClearPMCMode(chudCPU1Dev,chudUnused);
228: chudClearPMCs();
229: /* chudSetPMCMuxPosition(chudCPU1Dev,0,0); */
230: printf("%s\n",chudGetEventName(chudCPU1Dev,PMC_1,193));
231: printf("%s\n",chudGetEventDescription(chudCPU1Dev,PMC_1,193));
232: printf("%s\n",chudGetEventNotes(chudCPU1Dev,PMC_1,193));
233: chudSetPMCEvent(chudCPU1Dev,PMC_1,193);
234: chudSetPMCMode(chudCPU1Dev,PMC_1,chudCounter);
235: chudSetPrivilegeFilter(chudCPU1Dev,PMC_1,chudCountUserEvents);
236: chudSetPMCEventMask(chudCPU1Dev,PMC_1,0xFE);
237: if (!chudIsEventValid(chudCPU1Dev,PMC_1,193)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Event is not valid %d",193);
238: chudStartPMCs();
239: #endif
240: #if defined(PETSC_HAVE_PAPI)
241: PAPI_library_init(PAPI_VER_CURRENT);
242: if (ierr != PAPI_VER_CURRENT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Cannot initialize PAPI");
243: PAPI_query_event(PAPI_FP_INS);
244: PAPI_create_eventset(&PAPIEventSet);
245: PAPI_add_event(PAPIEventSet,PAPI_FP_INS);
246: PAPI_start(PAPIEventSet);
247: #endif
249: /* All processors sync here for more consistent logging */
250: MPI_Barrier(PETSC_COMM_WORLD);
251: PetscTime(petsc_BaseTime);
252: PetscLogStagePush(stage);
253: return(0);
254: }
258: /*@C
259: PetscLogBegin - Turns on logging of objects and events. This logs flop
260: rates and object creation and should not slow programs down too much.
261: This routine may be called more than once.
263: Logically Collective over PETSC_COMM_WORLD
265: Options Database Keys:
266: + -log_summary - Prints summary of flop and timing information to the
267: screen (for code compiled with PETSC_USE_LOG)
268: - -log - Prints detailed log information (for code compiled with PETSC_USE_LOG)
270: Usage:
271: .vb
272: PetscInitialize(...);
273: PetscLogBegin();
274: ... code ...
275: PetscLogView(viewer); or PetscLogDump();
276: PetscFinalize();
277: .ve
279: Notes:
280: PetscLogView(viewer) or PetscLogDump() actually cause the printing of
281: the logging information.
283: Level: advanced
285: .keywords: log, begin
286: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogView(), PetscLogTraceBegin()
287: @*/
288: PetscErrorCode PetscLogBegin(void)
289: {
293: PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault);
294: PetscLogBegin_Private();
295: return(0);
296: }
300: /*@C
301: PetscLogAllBegin - Turns on extensive logging of objects and events. Logs
302: all events. This creates large log files and slows the program down.
304: Logically Collective on PETSC_COMM_WORLD
306: Options Database Keys:
307: . -log_all - Prints extensive log information (for code compiled with PETSC_USE_LOG)
309: Usage:
310: .vb
311: PetscInitialize(...);
312: PetscLogAllBegin();
313: ... code ...
314: PetscLogDump(filename);
315: PetscFinalize();
316: .ve
318: Notes:
319: A related routine is PetscLogBegin (with the options key -log), which is
320: intended for production runs since it logs only flop rates and object
321: creation (and shouldn't significantly slow the programs).
323: Level: advanced
325: .keywords: log, all, begin
326: .seealso: PetscLogDump(), PetscLogBegin(), PetscLogTraceBegin()
327: @*/
328: PetscErrorCode PetscLogAllBegin(void)
329: {
333: PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete);
334: PetscLogBegin_Private();
335: return(0);
336: }
340: /*@
341: PetscLogTraceBegin - Activates trace logging. Every time a PETSc event
342: begins or ends, the event name is printed.
344: Logically Collective on PETSC_COMM_WORLD
346: Input Parameter:
347: . file - The file to print trace in (e.g. stdout)
349: Options Database Key:
350: . -log_trace [filename] - Activates PetscLogTraceBegin()
352: Notes:
353: PetscLogTraceBegin() prints the processor number, the execution time (sec),
354: then "Event begin:" or "Event end:" followed by the event name.
356: PetscLogTraceBegin() allows tracing of all PETSc calls, which is useful
357: to determine where a program is hanging without running in the
358: debugger. Can be used in conjunction with the -info option.
360: Level: intermediate
362: .seealso: PetscLogDump(), PetscLogAllBegin(), PetscLogView(), PetscLogBegin()
363: @*/
364: PetscErrorCode PetscLogTraceBegin(FILE *file)
365: {
369: petsc_tracefile = file;
370: PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace);
371: PetscLogBegin_Private();
372: return(0);
373: }
377: /*@
378: PetscLogActions - Determines whether actions are logged for the graphical viewer.
380: Not Collective
382: Input Parameter:
383: . flag - PETSC_TRUE if actions are to be logged
385: Level: intermediate
387: Note: Logging of actions continues to consume more memory as the program
388: runs. Long running programs should consider turning this feature off.
390: Options Database Keys:
391: . -log_exclude_actions - Turns off actions logging
393: .keywords: log, stage, register
394: .seealso: PetscLogStagePush(), PetscLogStagePop()
395: @*/
396: PetscErrorCode PetscLogActions(PetscBool flag)
397: {
399: petsc_logActions = flag;
400: return(0);
401: }
405: /*@
406: PetscLogObjects - Determines whether objects are logged for the graphical viewer.
408: Not Collective
410: Input Parameter:
411: . flag - PETSC_TRUE if objects are to be logged
413: Level: intermediate
415: Note: Logging of objects continues to consume more memory as the program
416: runs. Long running programs should consider turning this feature off.
418: Options Database Keys:
419: . -log_exclude_objects - Turns off objects logging
421: .keywords: log, stage, register
422: .seealso: PetscLogStagePush(), PetscLogStagePop()
423: @*/
424: PetscErrorCode PetscLogObjects(PetscBool flag)
425: {
427: petsc_logObjects = flag;
428: return(0);
429: }
431: /*------------------------------------------------ Stage Functions --------------------------------------------------*/
434: /*@C
435: PetscLogStageRegister - Attaches a charactor string name to a logging stage.
437: Not Collective
439: Input Parameter:
440: . sname - The name to associate with that stage
442: Output Parameter:
443: . stage - The stage number
445: Level: intermediate
447: .keywords: log, stage, register
448: .seealso: PetscLogStagePush(), PetscLogStagePop()
449: @*/
450: PetscErrorCode PetscLogStageRegister(const char sname[],PetscLogStage *stage)
451: {
452: PetscStageLog stageLog;
453: PetscLogEvent event;
457: PetscLogGetStageLog(&stageLog);
458: PetscStageLogRegister(stageLog, sname, stage);
459: /* Copy events already changed in the main stage, this sucks */
460: EventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents);
461: for (event = 0; event < stageLog->eventLog->numEvents; event++) {
462: EventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event],&stageLog->stageInfo[*stage].eventLog->eventInfo[event]);
463: }
464: ClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses);
465: return(0);
466: }
470: /*@C
471: PetscLogStagePush - This function pushes a stage on the stack.
473: Not Collective
475: Input Parameter:
476: . stage - The stage on which to log
478: Usage:
479: If the option -log_sumary is used to run the program containing the
480: following code, then 2 sets of summary data will be printed during
481: PetscFinalize().
482: .vb
483: PetscInitialize(int *argc,char ***args,0,0);
484: [stage 0 of code]
485: PetscLogStagePush(1);
486: [stage 1 of code]
487: PetscLogStagePop();
488: PetscBarrier(...);
489: [more stage 0 of code]
490: PetscFinalize();
491: .ve
492:
493: Notes:
494: Use PetscLogStageRegister() to register a stage.
496: Level: intermediate
498: .keywords: log, push, stage
499: .seealso: PetscLogStagePop(), PetscLogStageRegister(), PetscBarrier()
500: @*/
501: PetscErrorCode PetscLogStagePush(PetscLogStage stage)
502: {
503: PetscStageLog stageLog;
507: PetscLogGetStageLog(&stageLog);
508: PetscStageLogPush(stageLog, stage);
509: return(0);
510: }
514: /*@C
515: PetscLogStagePop - This function pops a stage from the stack.
517: Not Collective
519: Usage:
520: If the option -log_sumary is used to run the program containing the
521: following code, then 2 sets of summary data will be printed during
522: PetscFinalize().
523: .vb
524: PetscInitialize(int *argc,char ***args,0,0);
525: [stage 0 of code]
526: PetscLogStagePush(1);
527: [stage 1 of code]
528: PetscLogStagePop();
529: PetscBarrier(...);
530: [more stage 0 of code]
531: PetscFinalize();
532: .ve
534: Notes:
535: Use PetscLogStageRegister() to register a stage.
537: Level: intermediate
539: .keywords: log, pop, stage
540: .seealso: PetscLogStagePush(), PetscLogStageRegister(), PetscBarrier()
541: @*/
542: PetscErrorCode PetscLogStagePop(void)
543: {
544: PetscStageLog stageLog;
548: PetscLogGetStageLog(&stageLog);
549: PetscStageLogPop(stageLog);
550: return(0);
551: }
555: /*@
556: PetscLogStageSetActive - Determines stage activity for PetscLogEventBegin() and PetscLogEventEnd().
558: Not Collective
560: Input Parameters:
561: + stage - The stage
562: - isActive - The activity flag, PETSC_TRUE for logging, else PETSC_FALSE (defaults to PETSC_TRUE)
564: Level: intermediate
566: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscLogEventBegin(), PetscLogEventEnd(), PetscPreLoadBegin(), PetscPreLoadEnd(), PetscPreLoadStage()
567: @*/
568: PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive)
569: {
570: PetscStageLog stageLog;
574: PetscLogGetStageLog(&stageLog);
575: PetscStageLogSetActive(stageLog, stage, isActive);
576: return(0);
577: }
581: /*@
582: PetscLogStageGetActive - Returns stage activity for PetscLogEventBegin() and PetscLogEventEnd().
584: Not Collective
586: Input Parameter:
587: . stage - The stage
589: Output Parameter:
590: . isActive - The activity flag, PETSC_TRUE for logging, else PETSC_FALSE (defaults to PETSC_TRUE)
592: Level: intermediate
594: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscLogEventBegin(), PetscLogEventEnd(), PetscPreLoadBegin(), PetscPreLoadEnd(), PetscPreLoadStage()
595: @*/
596: PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive)
597: {
598: PetscStageLog stageLog;
602: PetscLogGetStageLog(&stageLog);
603: PetscStageLogGetActive(stageLog, stage, isActive);
604: return(0);
605: }
609: /*@
610: PetscLogStageSetVisible - Determines stage visibility in PetscLogView()
612: Not Collective
614: Input Parameters:
615: + stage - The stage
616: - isVisible - The visibility flag, PETSC_TRUE to print, else PETSC_FALSE (defaults to PETSC_TRUE)
618: Level: intermediate
620: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscLogView()
621: @*/
622: PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible)
623: {
624: PetscStageLog stageLog;
628: PetscLogGetStageLog(&stageLog);
629: PetscStageLogSetVisible(stageLog, stage, isVisible);
630: return(0);
631: }
635: /*@
636: PetscLogStageGetVisible - Returns stage visibility in PetscLogView()
638: Not Collective
640: Input Parameter:
641: . stage - The stage
643: Output Parameter:
644: . isVisible - The visibility flag, PETSC_TRUE to print, else PETSC_FALSE (defaults to PETSC_TRUE)
646: Level: intermediate
648: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscLogView()
649: @*/
650: PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible)
651: {
652: PetscStageLog stageLog;
656: PetscLogGetStageLog(&stageLog);
657: PetscStageLogGetVisible(stageLog, stage, isVisible);
658: return(0);
659: }
663: /*@C
664: PetscLogStageGetId - Returns the stage id when given the stage name.
666: Not Collective
668: Input Parameter:
669: . name - The stage name
671: Output Parameter:
672: . stage - The stage
674: Level: intermediate
676: .seealso: PetscLogStagePush(), PetscLogStagePop(), PetscPreLoadBegin(), PetscPreLoadEnd(), PetscPreLoadStage()
677: @*/
678: PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage)
679: {
680: PetscStageLog stageLog;
684: PetscLogGetStageLog(&stageLog);
685: PetscStageLogGetStage(stageLog, name, stage);
686: return(0);
687: }
689: /*------------------------------------------------ Event Functions --------------------------------------------------*/
692: /*@C
693: PetscLogEventRegister - Registers an event name for logging operations in an application code.
695: Not Collective
697: Input Parameter:
698: + name - The name associated with the event
699: - classid - The classid associated to the class for this event, obtain either with
700: PetscClassIdRegister() or use a predefined one such as KSP_CLASSID, SNES_CLASSID, the predefined ones
701: are only available in C code
702:
703: Output Parameter:
704: . event - The event id for use with PetscLogEventBegin() and PetscLogEventEnd().
706: Example of Usage:
707: .vb
708: PetscLogEvent USER_EVENT;
709: PetscClassId classid;
710: PetscLogDouble user_event_flops;
711: PetscClassIdRegister("class name",&classid);
712: PetscLogEventRegister("User event name",classid,&USER_EVENT);
713: PetscLogEventBegin(USER_EVENT,0,0,0,0);
714: [code segment to monitor]
715: PetscLogFlops(user_event_flops);
716: PetscLogEventEnd(USER_EVENT,0,0,0,0);
717: .ve
719: Notes:
720: PETSc automatically logs library events if the code has been
721: compiled with -DPETSC_USE_LOG (which is the default) and -log,
722: -log_summary, or -log_all are specified. PetscLogEventRegister() is
723: intended for logging user events to supplement this PETSc
724: information.
726: PETSc can gather data for use with the utilities Upshot/Nupshot
727: (part of the MPICH distribution). If PETSc has been compiled
728: with flag -DPETSC_HAVE_MPE (MPE is an additional utility within
729: MPICH), the user can employ another command line option, -log_mpe,
730: to create a logfile, "mpe.log", which can be visualized
731: Upshot/Nupshot.
733: The classid is associated with each event so that classes of events
734: can be disabled simultaneously, such as all matrix events. The user
735: can either use an existing classid, such as MAT_CLASSID, or create
736: their own as shown in the example.
738: Level: intermediate
740: .keywords: log, event, register
741: .seealso: PetscLogEventBegin(), PetscLogEventEnd(), PetscLogFlops(),
742: PetscLogEventMPEActivate(), PetscLogEventMPEDeactivate(),
743: PetscLogEventActivate(), PetscLogEventDeactivate(), PetscClassIdRegister()
744: @*/
745: PetscErrorCode PetscLogEventRegister(const char name[],PetscClassId classid,PetscLogEvent *event)
746: {
747: PetscStageLog stageLog;
748: int stage;
752: *event = PETSC_DECIDE;
753: PetscLogGetStageLog(&stageLog);
754: EventRegLogRegister(stageLog->eventLog, name, classid, event);
755: for (stage = 0; stage < stageLog->numStages; stage++) {
756: EventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents);
757: ClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses);
758: }
759: return(0);
760: }
764: /*@
765: PetscLogEventActivate - Indicates that a particular event should be logged.
767: Not Collective
769: Input Parameter:
770: . event - The event id
772: Usage:
773: .vb
774: PetscLogEventDeactivate(VEC_SetValues);
775: [code where you do not want to log VecSetValues()]
776: PetscLogEventActivate(VEC_SetValues);
777: [code where you do want to log VecSetValues()]
778: .ve
780: Note:
781: The event may be either a pre-defined PETSc event (found in include/petsclog.h)
782: or an event number obtained with PetscLogEventRegister().
784: Level: advanced
786: .keywords: log, event, activate
787: .seealso: PetscLogEventMPEDeactivate(),PetscLogEventMPEActivate(),PlogEventDeactivate()
788: @*/
789: PetscErrorCode PetscLogEventActivate(PetscLogEvent event)
790: {
791: PetscStageLog stageLog;
792: int stage;
796: PetscLogGetStageLog(&stageLog);
797: PetscStageLogGetCurrent(stageLog, &stage);
798: EventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event);
799: return(0);
800: }
804: /*@
805: PetscLogEventDeactivate - Indicates that a particular event should not be logged.
807: Not Collective
809: Input Parameter:
810: . event - The event id
812: Usage:
813: .vb
814: PetscLogEventDeactivate(VEC_SetValues);
815: [code where you do not want to log VecSetValues()]
816: PetscLogEventActivate(VEC_SetValues);
817: [code where you do want to log VecSetValues()]
818: .ve
820: Note:
821: The event may be either a pre-defined PETSc event (found in
822: include/petsclog.h) or an event number obtained with PetscLogEventRegister()).
824: Level: advanced
826: .keywords: log, event, deactivate
827: .seealso: PetscLogEventMPEDeactivate(),PetscLogEventMPEActivate(),PlogEventActivate()
828: @*/
829: PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event)
830: {
831: PetscStageLog stageLog;
832: int stage;
836: PetscLogGetStageLog(&stageLog);
837: PetscStageLogGetCurrent(stageLog, &stage);
838: EventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event);
839: return(0);
840: }
844: /*@
845: PetscLogEventSetActiveAll - Sets the event activity in every stage.
847: Not Collective
849: Input Parameters:
850: + event - The event id
851: - isActive - The activity flag determining whether the event is logged
853: Level: advanced
855: .keywords: log, event, activate
856: .seealso: PetscLogEventMPEDeactivate(),PetscLogEventMPEActivate(),PlogEventActivate(),PlogEventDeactivate()
857: @*/
858: PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive)
859: {
860: PetscStageLog stageLog;
861: int stage;
865: PetscLogGetStageLog(&stageLog);
866: for(stage = 0; stage < stageLog->numStages; stage++) {
867: if (isActive) {
868: EventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event);
869: } else {
870: EventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event);
871: }
872: }
873: return(0);
874: }
878: /*@
879: PetscLogEventActivateClass - Activates event logging for a PETSc object class.
881: Not Collective
883: Input Parameter:
884: . classid - The event class, for example MAT_CLASSID, SNES_CLASSID, etc.
886: Level: developer
888: .keywords: log, event, activate, class
889: .seealso: PetscInfoActivate(),PetscInfo(),PetscInfoAllow(),PetscLogEventDeactivateClass(), PetscLogEventActivate(),PetscLogEventDeactivate()
890: @*/
891: PetscErrorCode PetscLogEventActivateClass(PetscClassId classid)
892: {
893: PetscStageLog stageLog;
894: int stage;
898: PetscLogGetStageLog(&stageLog);
899: PetscStageLogGetCurrent(stageLog, &stage);
900: EventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
901: return(0);
902: }
906: /*@
907: PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class.
909: Not Collective
911: Input Parameter:
912: . classid - The event class, for example MAT_CLASSID, SNES_CLASSID, etc.
914: Level: developer
916: .keywords: log, event, deactivate, class
917: .seealso: PetscInfoActivate(),PetscInfo(),PetscInfoAllow(),PetscLogEventActivateClass(), PetscLogEventActivate(),PetscLogEventDeactivate()
918: @*/
919: PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid)
920: {
921: PetscStageLog stageLog;
922: int stage;
926: PetscLogGetStageLog(&stageLog);
927: PetscStageLogGetCurrent(stageLog, &stage);
928: EventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid);
929: return(0);
930: }
932: /*MC
933: PetscLogEventBegin - Logs the beginning of a user event.
935: Synopsis:
936: PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,
937: PetscObject o4)
939: Not Collective
941: Input Parameters:
942: + e - integer associated with the event obtained from PetscLogEventRegister()
943: - o1,o2,o3,o4 - objects associated with the event, or 0
946: Fortran Synopsis:
947: void PetscLogEventBegin(int e,PetscErrorCode ierr)
949: Usage:
950: .vb
951: PetscLogEvent USER_EVENT;
952: PetscLogDouble user_event_flops;
953: PetscLogEventRegister("User event",0,&USER_EVENT);
954: PetscLogEventBegin(USER_EVENT,0,0,0,0);
955: [code segment to monitor]
956: PetscLogFlops(user_event_flops);
957: PetscLogEventEnd(USER_EVENT,0,0,0,0);
958: .ve
960: Notes:
961: You need to register each integer event with the command
962: PetscLogEventRegister(). The source code must be compiled with
963: -DPETSC_USE_LOG, which is the default.
965: PETSc automatically logs library events if the code has been
966: compiled with -DPETSC_USE_LOG, and -log, -log_summary, or -log_all are
967: specified. PetscLogEventBegin() is intended for logging user events
968: to supplement this PETSc information.
970: Level: intermediate
972: .seealso: PetscLogEventRegister(), PetscLogEventEnd(), PetscLogFlops()
974: .keywords: log, event, begin
975: M*/
977: /*MC
978: PetscLogEventEnd - Log the end of a user event.
980: Synopsis:
981: PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,
982: PetscObject o4)
984: Not Collective
986: Input Parameters:
987: + e - integer associated with the event obtained with PetscLogEventRegister()
988: - o1,o2,o3,o4 - objects associated with the event, or 0
991: Fortran Synopsis:
992: void PetscLogEventEnd(int e,PetscErrorCode ierr)
994: Usage:
995: .vb
996: PetscLogEvent USER_EVENT;
997: PetscLogDouble user_event_flops;
998: PetscLogEventRegister("User event",0,&USER_EVENT,);
999: PetscLogEventBegin(USER_EVENT,0,0,0,0);
1000: [code segment to monitor]
1001: PetscLogFlops(user_event_flops);
1002: PetscLogEventEnd(USER_EVENT,0,0,0,0);
1003: .ve
1005: Notes:
1006: You should also register each additional integer event with the command
1007: PetscLogEventRegister(). Source code must be compiled with
1008: -DPETSC_USE_LOG, which is the default.
1010: PETSc automatically logs library events if the code has been
1011: compiled with -DPETSC_USE_LOG, and -log, -log_summary, or -log_all are
1012: specified. PetscLogEventEnd() is intended for logging user events
1013: to supplement this PETSc information.
1015: Level: intermediate
1017: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogFlops()
1019: .keywords: log, event, end
1020: M*/
1022: /*MC
1023: PetscLogEventBarrierBegin - Logs the time in a barrier before an event.
1025: Synopsis:
1026: PetscErrorCode PetscLogEventBarrierBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,
1027: PetscObject o4,MPI_Comm comm)
1029: Not Collective
1031: Input Parameters:
1032: . e - integer associated with the event obtained from PetscLogEventRegister()
1033: . o1,o2,o3,o4 - objects associated with the event, or 0
1034: . comm - communicator the barrier takes place over
1037: Usage:
1038: .vb
1039: PetscLogEventBarrierBegin(VEC_NormBarrier,0,0,0,0,comm);
1040: MPI_Allreduce()
1041: PetscLogEventBarrierEnd(VEC_NormBarrier,0,0,0,0,comm);
1042: .ve
1044: Notes:
1045: This is for logging the amount of time spent in a barrier for an event
1046: that requires synchronization.
1048: Additional Notes:
1049: Synchronization events always come in pairs; for example, VEC_NormBarrier and
1050: VEC_NormComm = VEC_NormBarrier + 1
1052: Level: advanced
1054: .seealso: PetscLogEventRegister(), PetscLogEventEnd(), PetscLogFlops(), PetscLogEventBegin(),
1055: PetscLogEventBarrierEnd()
1057: .keywords: log, event, begin, barrier
1058: M*/
1060: /*MC
1061: PetscLogEventBarrierEnd - Logs the time in a barrier before an event.
1063: Synopsis:
1064: PetscErrorCode PetscLogEventBarrierEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,
1065: PetscObject o4,MPI_Comm comm)
1067: Logically Collective on MPI_Comm
1069: Input Parameters:
1070: . e - integer associated with the event obtained from PetscLogEventRegister()
1071: . o1,o2,o3,o4 - objects associated with the event, or 0
1072: . comm - communicator the barrier takes place over
1075: Usage:
1076: .vb
1077: PetscLogEventBarrierBegin(VEC_NormBarrier,0,0,0,0,comm);
1078: MPI_Allreduce()
1079: PetscLogEventBarrierEnd(VEC_NormBarrier,0,0,0,0,comm);
1080: .ve
1082: Notes:
1083: This is for logging the amount of time spent in a barrier for an event
1084: that requires synchronization.
1086: Additional Notes:
1087: Synchronization events always come in pairs; for example, VEC_NormBarrier and
1088: VEC_NormComm = VEC_NormBarrier + 1
1090: Level: advanced
1092: .seealso: PetscLogEventRegister(), PetscLogEventEnd(), PetscLogFlops(), PetscLogEventBegin(),
1093: PetscLogEventBarrierBegin()
1095: .keywords: log, event, begin, barrier
1096: M*/
1100: /*@C
1101: PetscLogEventGetId - Returns the event id when given the event name.
1103: Not Collective
1105: Input Parameter:
1106: . name - The event name
1108: Output Parameter:
1109: . event - The event
1111: Level: intermediate
1113: .seealso: PetscLogEventBegin(), PetscLogEventEnd(), PetscLogStageGetId()
1114: @*/
1115: PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event)
1116: {
1117: PetscStageLog stageLog;
1121: PetscLogGetStageLog(&stageLog);
1122: EventRegLogGetEvent(stageLog->eventLog, name, event);
1123: return(0);
1124: }
1127: /*------------------------------------------------ Output Functions -------------------------------------------------*/
1130: /*@C
1131: PetscLogDump - Dumps logs of objects to a file. This file is intended to
1132: be read by bin/petscview. This program no longer exists.
1134: Collective on PETSC_COMM_WORLD
1136: Input Parameter:
1137: . name - an optional file name
1139: Options Database Keys:
1140: + -log - Prints basic log information (for code compiled with PETSC_USE_LOG)
1141: - -log_all - Prints extensive log information (for code compiled with PETSC_USE_LOG)
1142:
1143: Usage:
1144: .vb
1145: PetscInitialize(...);
1146: PetscLogBegin(); or PetscLogAllBegin();
1147: ... code ...
1148: PetscLogDump(filename);
1149: PetscFinalize();
1150: .ve
1152: Notes:
1153: The default file name is
1154: $ Log.<rank>
1155: where <rank> is the processor number. If no name is specified,
1156: this file will be used.
1158: Level: advanced
1160: .keywords: log, dump
1161: .seealso: PetscLogBegin(), PetscLogAllBegin(), PetscLogView()
1162: @*/
1163: PetscErrorCode PetscLogDump(const char sname[])
1164: {
1165: PetscStageLog stageLog;
1166: PetscEventPerfInfo *eventInfo;
1167: FILE *fd;
1168: char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN];
1169: PetscLogDouble flops, _TotalTime;
1170: PetscMPIInt rank;
1171: int action, object, curStage;
1172: PetscLogEvent event;
1173: PetscErrorCode ierr;
1176: /* Calculate the total elapsed time */
1177: PetscTime(_TotalTime);
1178: _TotalTime -= petsc_BaseTime;
1179: /* Open log file */
1180: MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
1181: if (sname) {
1182: sprintf(file, "%s.%d", sname, rank);
1183: } else {
1184: sprintf(file, "Log.%d", rank);
1185: }
1186: PetscFixFilename(file, fname);
1187: PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd);
1188: if ((!rank) && (!fd)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname);
1189: /* Output totals */
1190: PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flops %14e %16.8e\n", petsc_TotalFlops, _TotalTime);
1191: PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0);
1192: /* Output actions */
1193: if (petsc_logActions) {
1194: PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions);
1195: for(action = 0; action < petsc_numActions; action++) {
1196: PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n",
1197: petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1,
1198: petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem);
1199: }
1200: }
1201: /* Output objects */
1202: if (petsc_logObjects) {
1203: PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed);
1204: for(object = 0; object < petsc_numObjects; object++) {
1205: PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int) petsc_objects[object].mem);
1206: if (!petsc_objects[object].name[0]) {
1207: PetscFPrintf(PETSC_COMM_WORLD, fd,"No Name\n");
1208: } else {
1209: PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name);
1210: }
1211: if (petsc_objects[object].info[0] != 0) {
1212: PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n");
1213: } else {
1214: PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info);
1215: }
1216: }
1217: }
1218: /* Output events */
1219: PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n");
1220: PetscLogGetStageLog(&stageLog);
1221: PetscIntStackTop(stageLog->stack, &curStage);
1222: eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo;
1223: for(event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) {
1224: if (eventInfo[event].time != 0.0) {
1225: flops = eventInfo[event].flops/eventInfo[event].time;
1226: } else {
1227: flops = 0.0;
1228: }
1229: PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count,
1230: eventInfo[event].flops, eventInfo[event].time, flops);
1231: }
1232: PetscFClose(PETSC_COMM_WORLD, fd);
1233: return(0);
1234: }
1238: /*@C
1239: PetscLogViewer - Prints a summary of the logging.
1241: Collective over MPI_Comm
1243: Input Parameter:
1244: . viewer - an ASCII viewer
1246: Options Database Keys:
1247: . -log_summary - Prints summary of log information (for code compiled with PETSC_USE_LOG)
1249: Usage:
1250: .vb
1251: PetscInitialize(...);
1252: PetscLogBegin();
1253: ... code ...
1254: PetscLogView(PetscViewer);
1255: PetscFinalize(...);
1256: .ve
1258: Notes:
1259: By default the summary is printed to stdout.
1261: Level: beginner
1262:
1263: .keywords: log, dump, print
1264: .seealso: PetscLogBegin(), PetscLogDump()
1265: @*/
1266: PetscErrorCode PetscLogView(PetscViewer viewer)
1267: {
1268: FILE *fd;
1269: PetscLogDouble zero = 0.0;
1270: PetscStageLog stageLog;
1271: PetscStageInfo *stageInfo = PETSC_NULL;
1272: PetscEventPerfInfo *eventInfo = PETSC_NULL;
1273: PetscClassPerfInfo *classInfo;
1274: char arch[128],hostname[128],username[128],pname[PETSC_MAX_PATH_LEN],date[128];
1275: const char *name;
1276: PetscLogDouble locTotalTime, TotalTime, TotalFlops;
1277: PetscLogDouble numMessages, messageLength, avgMessLen, numReductions;
1278: PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red;
1279: PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed;
1280: PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed;
1281: PetscLogDouble min, max, tot, ratio, avg, x, y;
1282: PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratCt, totm, totml, totr;
1283: PetscMPIInt minCt, maxCt;
1284: PetscMPIInt size, rank;
1285: PetscBool *localStageUsed, *stageUsed;
1286: PetscBool *localStageVisible, *stageVisible;
1287: int numStages, localNumEvents, numEvents;
1288: int stage, lastStage, oclass;
1289: PetscLogEvent event;
1290: PetscErrorCode ierr;
1291: char version[256];
1292: MPI_Comm comm;
1293: #if defined(PETSC_THREADCOMM_ACTIVE)
1294: PetscInt nthreads;
1295: #endif
1298: PetscObjectGetComm((PetscObject)viewer,&comm);
1299: PetscViewerASCIIGetPointer(viewer,&fd);
1300: MPI_Comm_size(comm, &size);
1301: MPI_Comm_rank(comm, &rank);
1302: /* Pop off any stages the user forgot to remove */
1303: lastStage = 0;
1304: PetscLogGetStageLog(&stageLog);
1305: PetscStageLogGetCurrent(stageLog, &stage);
1306: while (stage >= 0) {
1307: lastStage = stage;
1308: PetscStageLogPop(stageLog);
1309: PetscStageLogGetCurrent(stageLog, &stage);
1310: }
1311: /* Get the total elapsed time */
1312: PetscTime(locTotalTime); locTotalTime -= petsc_BaseTime;
1314: PetscFPrintf(comm, fd, "************************************************************************************************************************\n");
1315: PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 120 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n");
1316: PetscFPrintf(comm, fd, "************************************************************************************************************************\n");
1317: PetscFPrintf(comm, fd, "\n---------------------------------------------- PETSc Performance Summary: ----------------------------------------------\n\n");
1318: PetscGetArchType(arch,sizeof(arch));
1319: PetscGetHostName(hostname,sizeof(hostname));
1320: PetscGetUserName(username,sizeof(username));
1321: PetscGetProgramName(pname,sizeof(pname));
1322: PetscGetDate(date,sizeof(date));
1323: PetscGetVersion(version,sizeof(version));
1324: if (size == 1) {
1325: PetscFPrintf(comm,fd,"%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date);
1326: } else {
1327: PetscFPrintf(comm,fd,"%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date);
1328: }
1329: #if defined(PETSC_THREADCOMM_ACTIVE)
1330: PetscThreadCommGetNThreads(PETSC_COMM_WORLD,&nthreads);
1331: if (nthreads > 1) {
1332: PetscFPrintf(comm,fd,"With %d threads per MPI_Comm\n", (int)nthreads);
1333: }
1334: #endif
1335:
1336: PetscFPrintf(comm, fd, "Using %s\n", version);
1338: /* Must preserve reduction count before we go on */
1339: red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
1341: /* Calculate summary information */
1342: PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total \n");
1343: /* Time */
1344: MPI_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1345: MPI_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1346: MPI_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1347: avg = (tot)/((PetscLogDouble) size);
1348: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1349: PetscFPrintf(comm, fd, "Time (sec): %5.3e %10.5f %5.3e\n", max, ratio, avg);
1350: TotalTime = tot;
1351: /* Objects */
1352: avg = (PetscLogDouble) petsc_numObjects;
1353: MPI_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1354: MPI_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1355: MPI_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1356: avg = (tot)/((PetscLogDouble) size);
1357: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1358: PetscFPrintf(comm, fd, "Objects: %5.3e %10.5f %5.3e\n", max, ratio, avg);
1359: /* Flops */
1360: MPI_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1361: MPI_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1362: MPI_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1363: avg = (tot)/((PetscLogDouble) size);
1364: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1365: PetscFPrintf(comm, fd, "Flops: %5.3e %10.5f %5.3e %5.3e\n", max, ratio, avg, tot);
1366: TotalFlops = tot;
1367: /* Flops/sec -- Must talk to Barry here */
1368: if (locTotalTime != 0.0) flops = petsc_TotalFlops/locTotalTime; else flops = 0.0;
1369: MPI_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1370: MPI_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1371: MPI_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1372: avg = (tot)/((PetscLogDouble) size);
1373: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1374: PetscFPrintf(comm, fd, "Flops/sec: %5.3e %10.5f %5.3e %5.3e\n", max, ratio, avg, tot);
1375: /* Memory */
1376: PetscMallocGetMaximumUsage(&mem);
1377: if (mem > 0.0) {
1378: MPI_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1379: MPI_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1380: MPI_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1381: avg = (tot)/((PetscLogDouble) size);
1382: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1383: PetscFPrintf(comm, fd, "Memory: %5.3e %10.5f %5.3e\n", max, ratio, tot);
1384: }
1385: /* Messages */
1386: mess = 0.5*(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
1387: MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1388: MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1389: MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1390: avg = (tot)/((PetscLogDouble) size);
1391: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1392: PetscFPrintf(comm, fd, "MPI Messages: %5.3e %10.5f %5.3e %5.3e\n", max, ratio, avg, tot);
1393: numMessages = tot;
1394: /* Message Lengths */
1395: mess = 0.5*(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
1396: MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1397: MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1398: MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1399: if (numMessages != 0) avg = (tot)/(numMessages); else avg = 0.0;
1400: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1401: PetscFPrintf(comm, fd, "MPI Message Lengths: %5.3e %10.5f %5.3e %5.3e\n", max, ratio, avg, tot);
1402: messageLength = tot;
1403: /* Reductions */
1404: MPI_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1405: MPI_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1406: MPI_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1407: if (min != 0.0) ratio = max/min; else ratio = 0.0;
1408: PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %10.5f\n", max, ratio);
1409: numReductions = red; /* wrong because uses count from process zero */
1410: PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n");
1411: PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n");
1412: PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n");
1414: /* Get total number of stages --
1415: Currently, a single processor can register more stages than another, but stages must all be registered in order.
1416: We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID.
1417: This seems best accomplished by assoicating a communicator with each stage.
1418: */
1419: MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm);
1420: PetscMalloc(numStages * sizeof(PetscBool), &localStageUsed);
1421: PetscMalloc(numStages * sizeof(PetscBool), &stageUsed);
1422: PetscMalloc(numStages * sizeof(PetscBool), &localStageVisible);
1423: PetscMalloc(numStages * sizeof(PetscBool), &stageVisible);
1424: if (numStages > 0) {
1425: stageInfo = stageLog->stageInfo;
1426: for(stage = 0; stage < numStages; stage++) {
1427: if (stage < stageLog->numStages) {
1428: localStageUsed[stage] = stageInfo[stage].used;
1429: localStageVisible[stage] = stageInfo[stage].perfInfo.visible;
1430: } else {
1431: localStageUsed[stage] = PETSC_FALSE;
1432: localStageVisible[stage] = PETSC_TRUE;
1433: }
1434: }
1435: MPI_Allreduce(localStageUsed, stageUsed, numStages, MPI_INT, MPI_LOR, comm);
1436: MPI_Allreduce(localStageVisible, stageVisible, numStages, MPI_INT, MPI_LAND, comm);
1437: for(stage = 0; stage < numStages; stage++) {
1438: if (stageUsed[stage]) {
1439: PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flops ----- --- Messages --- -- Message Lengths -- -- Reductions --\n");
1440: PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total counts %%Total Avg %%Total counts %%Total \n");
1441: break;
1442: }
1443: }
1444: for(stage = 0; stage < numStages; stage++) {
1445: if (!stageUsed[stage]) continue;
1446: if (localStageUsed[stage]) {
1447: MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1448: MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1449: MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1450: MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1451: MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1452: name = stageInfo[stage].name;
1453: } else {
1454: MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1455: MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1456: MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1457: MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1458: MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1459: name = "";
1460: }
1461: mess *= 0.5; messLen *= 0.5; red /= size;
1462: if (TotalTime != 0.0) fracTime = stageTime/TotalTime; else fracTime = 0.0;
1463: if (TotalFlops != 0.0) fracFlops = flops/TotalFlops; else fracFlops = 0.0;
1464: /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */
1465: if (numMessages != 0.0) fracMessages = mess/numMessages; else fracMessages = 0.0;
1466: if (numMessages != 0.0) avgMessLen = messLen/numMessages; else avgMessLen = 0.0;
1467: if (messageLength != 0.0) fracLength = messLen/messageLength; else fracLength = 0.0;
1468: if (numReductions != 0.0) fracReductions = red/numReductions; else fracReductions = 0.0;
1469: PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% \n",
1470: stage, name, stageTime/size, 100.0*fracTime, flops, 100.0*fracFlops,
1471: mess, 100.0*fracMessages, avgMessLen, 100.0*fracLength, red, 100.0*fracReductions);
1472: }
1473: }
1475: PetscFPrintf(comm, fd,
1476: "\n------------------------------------------------------------------------------------------------------------------------\n");
1477:
1478: PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n");
1479: PetscFPrintf(comm, fd, "Phase summary info:\n");
1480: PetscFPrintf(comm, fd, " Count: number of times phase was executed\n");
1481: PetscFPrintf(comm, fd, " Time and Flops: Max - maximum over all processors\n");
1482: PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n");
1483: PetscFPrintf(comm, fd, " Mess: number of messages sent\n");
1484: PetscFPrintf(comm, fd, " Avg. len: average message length\n");
1485: PetscFPrintf(comm, fd, " Reduct: number of global reductions\n");
1486: PetscFPrintf(comm, fd, " Global: entire computation\n");
1487: PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n");
1488: PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flops in this phase\n");
1489: PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n");
1490: PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n");
1491: PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flops over all processors)/(max time over all processors)\n");
1492: PetscFPrintf(comm, fd,
1493: "------------------------------------------------------------------------------------------------------------------------\n");
1494:
1496: #if defined(PETSC_USE_DEBUG)
1497: PetscFPrintf(comm, fd, "\n\n");
1498: PetscFPrintf(comm, fd, " ##########################################################\n");
1499: PetscFPrintf(comm, fd, " # #\n");
1500: PetscFPrintf(comm, fd, " # WARNING!!! #\n");
1501: PetscFPrintf(comm, fd, " # #\n");
1502: PetscFPrintf(comm, fd, " # This code was compiled with a debugging option, #\n");
1503: PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n");
1504: PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n");
1505: PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n");
1506: PetscFPrintf(comm, fd, " # #\n");
1507: PetscFPrintf(comm, fd, " ##########################################################\n\n\n");
1508: #endif
1509: #if defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_FORTRAN_KERNELS)
1510: PetscFPrintf(comm, fd, "\n\n");
1511: PetscFPrintf(comm, fd, " ##########################################################\n");
1512: PetscFPrintf(comm, fd, " # #\n");
1513: PetscFPrintf(comm, fd, " # WARNING!!! #\n");
1514: PetscFPrintf(comm, fd, " # #\n");
1515: PetscFPrintf(comm, fd, " # The code for various complex numbers numerical #\n");
1516: PetscFPrintf(comm, fd, " # kernels uses C++, which generally is not well #\n");
1517: PetscFPrintf(comm, fd, " # optimized. For performance that is about 4-5 times #\n");
1518: PetscFPrintf(comm, fd, " # faster, specify --with-fortran-kernels=1 #\n");
1519: PetscFPrintf(comm, fd, " # when running ./configure.py. #\n");
1520: PetscFPrintf(comm, fd, " # #\n");
1521: PetscFPrintf(comm, fd, " ##########################################################\n\n\n");
1522: #endif
1524: /* Report events */
1525: PetscFPrintf(comm, fd,
1526: "Event Count Time (sec) Flops --- Global --- --- Stage --- Total\n");
1527:
1528: PetscFPrintf(comm, fd,
1529: " Max Ratio Max Ratio Max Ratio Mess Avg len Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s\n");
1530:
1531: PetscFPrintf(comm,fd,
1532: "------------------------------------------------------------------------------------------------------------------------\n");
1534:
1535: /* Problem: The stage name will not show up unless the stage executed on proc 1 */
1536: for(stage = 0; stage < numStages; stage++) {
1537: if (!stageVisible[stage]) continue;
1538: if (localStageUsed[stage]) {
1539: PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name);
1540: MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1541: MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1542: MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1543: MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1544: MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1545: } else {
1546: PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
1547: MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1548: MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1549: MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1550: MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1551: MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1552: }
1553: mess *= 0.5; messLen *= 0.5; red /= size;
1555: /* Get total number of events in this stage --
1556: Currently, a single processor can register more events than another, but events must all be registered in order,
1557: just like stages. We can removed this requirement if necessary by having a global event numbering and indirection
1558: on the event ID. This seems best accomplished by assoicating a communicator with each stage.
1560: Problem: If the event did not happen on proc 1, its name will not be available.
1561: Problem: Event visibility is not implemented
1562: */
1563: if (localStageUsed[stage]) {
1564: eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1565: localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents;
1566: } else {
1567: localNumEvents = 0;
1568: }
1569: MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
1570: for(event = 0; event < numEvents; event++) {
1571: if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) {
1572: if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) {
1573: flopr = eventInfo[event].flops;
1574: } else {
1575: flopr = 0.0;
1576: }
1577: MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1578: MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1579: MPI_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1580: MPI_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1581: MPI_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1582: MPI_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1583: MPI_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1584: MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1585: MPI_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1586: MPI_Allreduce(&eventInfo[event].count, &minCt, 1, MPI_INT, MPI_MIN, comm);
1587: MPI_Allreduce(&eventInfo[event].count, &maxCt, 1, MPI_INT, MPI_MAX, comm);
1588: name = stageLog->eventLog->eventInfo[event].name;
1589: } else {
1590: flopr = 0.0;
1591: MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1592: MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1593: MPI_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1594: MPI_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm);
1595: MPI_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm);
1596: MPI_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1597: MPI_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1598: MPI_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1599: MPI_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
1600: MPI_Allreduce(&ierr, &minCt, 1, MPI_INT, MPI_MIN, comm);
1601: MPI_Allreduce(&ierr, &maxCt, 1, MPI_INT, MPI_MAX, comm);
1602: name = "";
1603: }
1604: if (mint < 0.0) {
1605: PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n",mint,name);
1606: mint = 0;
1607: }
1608: if (minf < 0.0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Minimum flops %g over all processors for %s is negative! Not possible!",minf,name);
1609: totm *= 0.5; totml *= 0.5; totr /= size;
1610:
1611: if (maxCt != 0) {
1612: if (minCt != 0) ratCt = ((PetscLogDouble) maxCt)/minCt; else ratCt = 0.0;
1613: if (mint != 0.0) ratt = maxt/mint; else ratt = 0.0;
1614: if (minf != 0.0) ratf = maxf/minf; else ratf = 0.0;
1615: if (TotalTime != 0.0) fracTime = tott/TotalTime; else fracTime = 0.0;
1616: if (TotalFlops != 0.0) fracFlops = totf/TotalFlops; else fracFlops = 0.0;
1617: if (stageTime != 0.0) fracStageTime = tott/stageTime; else fracStageTime = 0.0;
1618: if (flops != 0.0) fracStageFlops = totf/flops; else fracStageFlops = 0.0;
1619: if (numMessages != 0.0) fracMess = totm/numMessages; else fracMess = 0.0;
1620: if (messageLength != 0.0) fracMessLen = totml/messageLength; else fracMessLen = 0.0;
1621: if (numReductions != 0.0) fracRed = totr/numReductions; else fracRed = 0.0;
1622: if (mess != 0.0) fracStageMess = totm/mess; else fracStageMess = 0.0;
1623: if (messLen != 0.0) fracStageMessLen = totml/messLen; else fracStageMessLen = 0.0;
1624: if (red != 0.0) fracStageRed = totr/red; else fracStageRed = 0.0;
1625: if (totm != 0.0) totml /= totm; else totml = 0.0;
1626: if (maxt != 0.0) flopr = totf/maxt; else flopr = 0.0;
1627: PetscFPrintf(comm, fd,
1628: "%-16s %7d%4.1f %5.4e%4.1f %3.2e%4.1f %2.1e %2.1e %2.1e%3.0f%3.0f%3.0f%3.0f%3.0f %3.0f%3.0f%3.0f%3.0f%3.0f %5.0f\n",
1629: name, maxCt, ratCt, maxt, ratt, maxf, ratf, totm, totml, totr,
1630: 100.0*fracTime, 100.0*fracFlops, 100.0*fracMess, 100.0*fracMessLen, 100.0*fracRed,
1631: 100.0*fracStageTime, 100.0*fracStageFlops, 100.0*fracStageMess, 100.0*fracStageMessLen, 100.0*fracStageRed,
1632: PetscAbsReal(flopr/1.0e6));
1633: }
1634: }
1635: }
1637: /* Memory usage and object creation */
1638: PetscFPrintf(comm, fd,
1639: "------------------------------------------------------------------------------------------------------------------------\n");
1640: PetscFPrintf(comm, fd, "\n");
1641: PetscFPrintf(comm, fd, "Memory usage is given in bytes:\n\n");
1643: /* Right now, only stages on the first processor are reported here, meaning only objects associated with
1644: the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then
1645: stats for stages local to processor sets.
1646: */
1647: /* We should figure out the longest object name here (now 20 characters) */
1648: PetscFPrintf(comm, fd, "Object Type Creations Destructions Memory Descendants' Mem.\n");
1649: PetscFPrintf(comm, fd, "Reports information only for process 0.\n");
1650: for(stage = 0; stage < numStages; stage++) {
1651: if (localStageUsed[stage]) {
1652: classInfo = stageLog->stageInfo[stage].classLog->classInfo;
1653: PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name);
1654: for(oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) {
1655: if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) {
1656: PetscFPrintf(comm, fd, "%20s %5d %5d %11.0f %g\n", stageLog->classLog->classInfo[oclass].name,
1657: classInfo[oclass].creations, classInfo[oclass].destructions, classInfo[oclass].mem,
1658: classInfo[oclass].descMem);
1659: }
1660: }
1661: } else {
1662: PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
1663: }
1664: }
1666: PetscFree(localStageUsed);
1667: PetscFree(stageUsed);
1668: PetscFree(localStageVisible);
1669: PetscFree(stageVisible);
1671: /* Information unrelated to this particular run */
1672: PetscFPrintf(comm, fd,
1673: "========================================================================================================================\n");
1674: PetscTime(y);
1675: PetscTime(x);
1676: PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y);
1677: PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y);
1678: PetscFPrintf(comm,fd,"Average time to get PetscTime(): %g\n", (y-x)/10.0);
1679: /* MPI information */
1680: if (size > 1) {
1681: MPI_Status status;
1682: PetscMPIInt tag;
1683: MPI_Comm newcomm;
1685: MPI_Barrier(comm);
1686: PetscTime(x);
1687: MPI_Barrier(comm);
1688: MPI_Barrier(comm);
1689: MPI_Barrier(comm);
1690: MPI_Barrier(comm);
1691: MPI_Barrier(comm);
1692: PetscTime(y);
1693: PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y-x)/5.0);
1694: PetscCommDuplicate(comm,&newcomm, &tag);
1695: MPI_Barrier(comm);
1696: if (rank) {
1697: MPI_Recv(0, 0, MPI_INT, rank-1, tag, newcomm, &status);
1698: MPI_Send(0, 0, MPI_INT, (rank+1)%size, tag, newcomm);
1699: } else {
1700: PetscTime(x);
1701: MPI_Send(0, 0, MPI_INT, 1, tag, newcomm);
1702: MPI_Recv(0, 0, MPI_INT, size-1, tag, newcomm, &status);
1703: PetscTime(y);
1704: PetscFPrintf(comm,fd,"Average time for zero size MPI_Send(): %g\n", (y-x)/size);
1705: }
1706: PetscCommDestroy(&newcomm);
1707: }
1708: PetscOptionsView(viewer);
1710: /* Machine and compile information */
1711: #if defined(PETSC_USE_FORTRAN_KERNELS)
1712: PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n");
1713: #else
1714: PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n");
1715: #endif
1716: #if defined(PETSC_USE_REAL_SINGLE)
1717: PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n");
1718: #elif defined(PETSC_USE_LONGDOUBLE)
1719: PetscFPrintf(comm, fd, "Compiled with long double precision PetscScalar and PetscReal\n");
1720: #endif
1722: #if defined(PETSC_USE_REAL_MAT_SINGLE)
1723: PetscFPrintf(comm, fd, "Compiled with single precision matrices\n");
1724: #else
1725: PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n");
1726: #endif
1727: PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n",
1728: (int) sizeof(short), (int) sizeof(int), (int) sizeof(long), (int) sizeof(void*),(int) sizeof(PetscScalar),(int) sizeof(PetscInt));
1730: PetscFPrintf(comm, fd, "Configure run at: %s\n",petscconfigureruntime);
1731: PetscFPrintf(comm, fd, "Configure options: %s",petscconfigureoptions);
1732: PetscFPrintf(comm, fd, "%s", petscmachineinfo);
1733: PetscFPrintf(comm, fd, "%s", petsccompilerinfo);
1734: PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo);
1735: PetscFPrintf(comm, fd, "%s", petsclinkerinfo);
1737: /* Cleanup */
1738: PetscFPrintf(comm, fd, "\n");
1739: PetscStageLogPush(stageLog, lastStage);
1740: return(0);
1741: }
1745: /*@C
1746: PetscLogPrintDetailed - Each process prints the times for its own events
1748: Collective over MPI_Comm
1750: Input Parameter:
1751: + comm - The MPI communicator (only one processor prints output)
1752: - file - [Optional] The output file name
1754: Options Database Keys:
1755: . -log_summary_detailed - Prints summary of log information (for code compiled with PETSC_USE_LOG)
1757: Usage:
1758: .vb
1759: PetscInitialize(...);
1760: PetscLogBegin();
1761: ... code ...
1762: PetscLogPrintDetailed(MPI_Comm,filename);
1763: PetscFinalize(...);
1764: .ve
1766: Notes:
1767: By default the summary is printed to stdout.
1769: Level: beginner
1770:
1771: .keywords: log, dump, print
1772: .seealso: PetscLogBegin(), PetscLogDump(), PetscLogView()
1773: @*/
1774: PetscErrorCode PetscLogPrintDetailed(MPI_Comm comm, const char filename[])
1775: {
1776: FILE *fd = PETSC_STDOUT;
1777: PetscStageLog stageLog;
1778: PetscStageInfo *stageInfo = PETSC_NULL;
1779: PetscEventPerfInfo *eventInfo = PETSC_NULL;
1780: const char *name = PETSC_NULL;
1781: PetscLogDouble TotalTime;
1782: PetscLogDouble stageTime, flops, flopr, mess, messLen, red;
1783: PetscLogDouble maxf, totf, maxt, tott, totm, totml, totr = 0.0;
1784: PetscMPIInt maxCt;
1785: PetscBool *stageUsed;
1786: PetscBool *stageVisible;
1787: int numStages, numEvents;
1788: int stage;
1789: PetscLogEvent event;
1790: PetscErrorCode ierr;
1793: /* Pop off any stages the user forgot to remove */
1794: PetscLogGetStageLog(&stageLog);
1795: PetscStageLogGetCurrent(stageLog, &stage);
1796: while (stage >= 0) {
1797: PetscStageLogPop(stageLog);
1798: PetscStageLogGetCurrent(stageLog, &stage);
1799: }
1800: /* Get the total elapsed time */
1801: PetscTime(TotalTime); TotalTime -= petsc_BaseTime;
1802: /* Open the summary file */
1803: if (filename) {
1804: PetscFOpen(comm, filename, "w", &fd);
1805: }
1807: PetscFPrintf(comm, fd, "************************************************************************************************************************\n");
1808: PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 120 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n");
1809: PetscFPrintf(comm, fd, "************************************************************************************************************************\n");
1812: numStages = stageLog->numStages;
1813: PetscMalloc(numStages * sizeof(PetscBool), &stageUsed);
1814: PetscMalloc(numStages * sizeof(PetscBool), &stageVisible);
1815: if (numStages > 0) {
1816: stageInfo = stageLog->stageInfo;
1817: for(stage = 0; stage < numStages; stage++) {
1818: if (stage < stageLog->numStages) {
1819: stageUsed[stage] = stageInfo[stage].used;
1820: stageVisible[stage] = stageInfo[stage].perfInfo.visible;
1821: } else {
1822: stageUsed[stage] = PETSC_FALSE;
1823: stageVisible[stage] = PETSC_TRUE;
1824: }
1825: }
1826: }
1828: /* Report events */
1829: PetscFPrintf(comm, fd,"Event Count Time (sec) Flops/sec \n");
1830: PetscFPrintf(comm, fd," Mess Avg len Reduct \n");
1831: PetscFPrintf(comm,fd,"-----------------------------------------------------------------------------------\n");
1832: /* Problem: The stage name will not show up unless the stage executed on proc 1 */
1833: for(stage = 0; stage < numStages; stage++) {
1834: if (!stageVisible[stage]) continue;
1835: if (stageUsed[stage]) {
1836: PetscSynchronizedFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name);
1837: MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1838: MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1839: MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1840: MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1841: MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1842: }
1843: mess *= 0.5; messLen *= 0.5;
1845: /* Get total number of events in this stage --
1846: */
1847: if (stageUsed[stage]) {
1848: eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
1849: numEvents = stageLog->stageInfo[stage].eventLog->numEvents;
1850: } else {
1851: numEvents = 0;
1852: }
1853: for(event = 0; event < numEvents; event++) {
1854: if (stageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents)) {
1855: if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) {
1856: flopr = eventInfo[event].flops/eventInfo[event].time;
1857: } else {
1858: flopr = 0.0;
1859: }
1860: MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, PETSC_COMM_SELF);
1861: MPI_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1862: MPI_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, PETSC_COMM_SELF);
1863: MPI_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1864: MPI_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1865: MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, PETSC_COMM_SELF);
1866: totr = eventInfo[event].numReductions;
1867: MPI_Allreduce(&eventInfo[event].count, &maxCt, 1, MPI_INT, MPI_MAX, PETSC_COMM_SELF);
1868: name = stageLog->eventLog->eventInfo[event].name;
1869: totm *= 0.5; totml *= 0.5;
1870: }
1871:
1872: if (maxCt != 0) {
1873: if (totm != 0.0) totml /= totm; else totml = 0.0;
1874: PetscSynchronizedFPrintf(comm, fd,"%-16s %7d %5.4e %3.2e %2.1e %2.1e %2.1e\n",name, maxCt, maxt, maxf, totm, totml, totr);
1875: }
1876: }
1877: }
1878: PetscSynchronizedFlush(comm);
1880: PetscFree(stageUsed);
1881: PetscFree(stageVisible);
1883: PetscFClose(comm, fd);
1884: return(0);
1885: }
1887: /*----------------------------------------------- Counter Functions -------------------------------------------------*/
1890: /*@C
1891: PetscGetFlops - Returns the number of flops used on this processor
1892: since the program began.
1894: Not Collective
1896: Output Parameter:
1897: flops - number of floating point operations
1899: Notes:
1900: A global counter logs all PETSc flop counts. The user can use
1901: PetscLogFlops() to increment this counter to include flops for the
1902: application code.
1904: PETSc automatically logs library events if the code has been
1905: compiled with -DPETSC_USE_LOG (which is the default), and -log,
1906: -log_summary, or -log_all are specified. PetscLogFlops() is
1907: intended for logging user flops to supplement this PETSc
1908: information.
1910: Level: intermediate
1912: .keywords: log, flops, floating point operations
1914: .seealso: PetscGetTime(), PetscLogFlops()
1915: @*/
1916: PetscErrorCode PetscGetFlops(PetscLogDouble *flops)
1917: {
1919: *flops = petsc_TotalFlops;
1920: return(0);
1921: }
1925: PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...)
1926: {
1928: size_t fullLength;
1929: va_list Argp;
1932: if (!petsc_logObjects) return(0);
1933: va_start(Argp, format);
1934: PetscVSNPrintf(petsc_objects[obj->id].info, 64,format,&fullLength, Argp);
1935: va_end(Argp);
1936: return(0);
1937: }
1940: /*MC
1941: PetscLogFlops - Adds floating point operations to the global counter.
1943: Synopsis:
1944: PetscErrorCode PetscLogFlops(PetscLogDouble f)
1946: Not Collective
1948: Input Parameter:
1949: . f - flop counter
1952: Usage:
1953: .vb
1954: PetscLogEvent USER_EVENT;
1955: PetscLogEventRegister("User event",0,&USER_EVENT);
1956: PetscLogEventBegin(USER_EVENT,0,0,0,0);
1957: [code segment to monitor]
1958: PetscLogFlops(user_flops)
1959: PetscLogEventEnd(USER_EVENT,0,0,0,0);
1960: .ve
1962: Notes:
1963: A global counter logs all PETSc flop counts. The user can use
1964: PetscLogFlops() to increment this counter to include flops for the
1965: application code.
1967: PETSc automatically logs library events if the code has been
1968: compiled with -DPETSC_USE_LOG (which is the default), and -log,
1969: -log_summary, or -log_all are specified. PetscLogFlops() is
1970: intended for logging user flops to supplement this PETSc
1971: information.
1973: Level: intermediate
1975: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PetscGetFlops()
1977: .keywords: log, flops, floating point operations
1978: M*/
1980: /*MC
1981: PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice)
1982: to get accurate timings
1984: Synopsis:
1985: void PetscPreLoadBegin(PetscBool flag,char *name);
1987: Not Collective
1989: Input Parameter:
1990: + flag - PETSC_TRUE to run twice, PETSC_FALSE to run once, may be overridden
1991: with command line option -preload true or -preload false
1992: - name - name of first stage (lines of code timed separately with -log_summary) to
1993: be preloaded
1995: Usage:
1996: .vb
1997: PetscPreLoadBegin(PETSC_TRUE,"first stage);
1998: lines of code
1999: PetscPreLoadStage("second stage");
2000: lines of code
2001: PetscPreLoadEnd();
2002: .ve
2004: Notes: Only works in C/C++, not Fortran
2006: Flags available within the macro.
2007: + PetscPreLoadingUsed - true if we are or have done preloading
2008: . PetscPreLoadingOn - true if it is CURRENTLY doing preload
2009: . PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second
2010: - PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on
2011: The first two variables are available throughout the program, the second two only between the PetscPreLoadBegin()
2012: and PetscPreLoadEnd()
2014: Level: intermediate
2016: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PetscPreLoadEnd(), PetscPreLoadStage()
2018: Concepts: preloading
2019: Concepts: timing^accurate
2020: Concepts: paging^eliminating effects of
2023: M*/
2025: /*MC
2026: PetscPreLoadEnd - End a segment of code that may be preloaded (run twice)
2027: to get accurate timings
2029: Synopsis:
2030: void PetscPreLoadEnd(void);
2032: Not Collective
2034: Usage:
2035: .vb
2036: PetscPreLoadBegin(PETSC_TRUE,"first stage);
2037: lines of code
2038: PetscPreLoadStage("second stage");
2039: lines of code
2040: PetscPreLoadEnd();
2041: .ve
2043: Notes: only works in C/C++ not fortran
2045: Level: intermediate
2047: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PetscPreLoadBegin(), PetscPreLoadStage()
2049: M*/
2051: /*MC
2052: PetscPreLoadStage - Start a new segment of code to be timed separately.
2053: to get accurate timings
2055: Synopsis:
2056: void PetscPreLoadStage(char *name);
2058: Not Collective
2060: Usage:
2061: .vb
2062: PetscPreLoadBegin(PETSC_TRUE,"first stage);
2063: lines of code
2064: PetscPreLoadStage("second stage");
2065: lines of code
2066: PetscPreLoadEnd();
2067: .ve
2069: Notes: only works in C/C++ not fortran
2071: Level: intermediate
2073: .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PetscPreLoadBegin(), PetscPreLoadEnd()
2075: M*/
2077: #include <../src/sys/viewer/impls/ascii/asciiimpl.h> /*I "petscsys.h" I*/
2080: /*@
2081: PetscLogViewPython - Saves logging information in a Python format.
2083: Collective on PetscViewer
2085: Input Paramter:
2086: . viewer - viewer to save Python data
2088: Level: intermediate
2090: @*/
2091: PetscErrorCode PetscLogViewPython(PetscViewer viewer)
2092: {
2093: PetscViewer_ASCII *ascii = (PetscViewer_ASCII*)viewer->data;
2094: FILE *fd = ascii->fd;
2095: PetscLogDouble zero = 0.0;
2096: PetscStageLog stageLog;
2097: PetscStageInfo *stageInfo = PETSC_NULL;
2098: PetscEventPerfInfo *eventInfo = PETSC_NULL;
2099: const char *name;
2100: char stageName[2048];
2101: char eventName[2048];
2102: PetscLogDouble locTotalTime, TotalTime = 0, TotalFlops = 0;
2103: PetscLogDouble numMessages = 0, messageLength = 0, avgMessLen, numReductions = 0;
2104: PetscLogDouble stageTime, flops, mem, mess, messLen, red;
2105: PetscLogDouble fracTime, fracFlops, fracMessages, fracLength;
2106: PetscLogDouble fracReductions;
2107: PetscLogDouble tot,avg,x,y,*mydata;
2108: PetscMPIInt maxCt;
2109: PetscMPIInt size, rank, *mycount;
2110: PetscBool *localStageUsed, *stageUsed;
2111: PetscBool *localStageVisible, *stageVisible;
2112: int numStages, localNumEvents, numEvents;
2113: int stage, lastStage;
2114: PetscLogEvent event;
2115: PetscErrorCode ierr;
2116: PetscInt i;
2117: MPI_Comm comm;
2120: PetscObjectGetComm((PetscObject)viewer,&comm);
2121: MPI_Comm_size(comm, &size);
2122: MPI_Comm_rank(comm, &rank);
2123: PetscMalloc(size*sizeof(PetscLogDouble), &mydata);
2124: PetscMalloc(size*sizeof(PetscMPIInt), &mycount);
2126: /* Pop off any stages the user forgot to remove */
2127: lastStage = 0;
2128: PetscLogGetStageLog(&stageLog);
2129: PetscStageLogGetCurrent(stageLog, &stage);
2130: while (stage >= 0) {
2131: lastStage = stage;
2132: PetscStageLogPop(stageLog);
2133: PetscStageLogGetCurrent(stageLog, &stage);
2134: }
2135: /* Get the total elapsed time */
2136: PetscTime(locTotalTime); locTotalTime -= petsc_BaseTime;
2138: PetscFPrintf(comm, fd, "\n#------ PETSc Performance Summary ----------\n\n");
2139: PetscFPrintf(comm, fd, "Nproc = %d\n",size);
2141: /* Must preserve reduction count before we go on */
2142: red = (petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct)/((PetscLogDouble) size);
2144: /* Calculate summary information */
2146: /* Time */
2147: MPI_Gather(&locTotalTime,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2148: if (!rank){
2149: PetscFPrintf(comm, fd, "Time = [ " );
2150: tot = 0.0;
2151: for (i=0; i<size; i++){
2152: tot += mydata[i];
2153: PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );
2154: }
2155: PetscFPrintf(comm, fd, "]\n" );
2156: avg = (tot)/((PetscLogDouble) size);
2157: TotalTime = tot;
2158: }
2160: /* Objects */
2161: avg = (PetscLogDouble) petsc_numObjects;
2162: MPI_Gather(&avg,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2163: if (!rank){
2164: PetscFPrintf(comm, fd, "Objects = [ " );
2165: for (i=0; i<size; i++){
2166: PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );
2167: }
2168: PetscFPrintf(comm, fd, "]\n" );
2169: }
2171: /* Flops */
2172: MPI_Gather(&petsc_TotalFlops,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2173: if (!rank){
2174: PetscFPrintf(comm, fd, "Flops = [ " );
2175: tot = 0.0;
2176: for (i=0; i<size; i++){
2177: tot += mydata[i];
2178: PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );
2179: }
2180: PetscFPrintf(comm, fd, "]\n");
2181: TotalFlops = tot;
2182: }
2184: /* Memory */
2185: PetscMallocGetMaximumUsage(&mem);
2186: MPI_Gather(&mem,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2187: if (!rank){
2188: PetscFPrintf(comm, fd, "Memory = [ " );
2189: for (i=0; i<size; i++){
2190: PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );
2191: }
2192: PetscFPrintf(comm, fd, "]\n" );
2193: }
2195: /* Messages */
2196: mess = 0.5*(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
2197: MPI_Gather(&mess,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2198: if (!rank){
2199: PetscFPrintf(comm, fd, "MPIMessages = [ " );
2200: tot = 0.0;
2201: for (i=0; i<size; i++){
2202: tot += mydata[i];
2203: PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );
2204: }
2205: PetscFPrintf(comm, fd, "]\n" );
2206: numMessages = tot;
2207: }
2209: /* Message Lengths */
2210: mess = 0.5*(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
2211: MPI_Gather(&mess,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2212: if (!rank){
2213: PetscFPrintf(comm, fd, "MPIMessageLengths = [ " );
2214: tot = 0.0;
2215: for (i=0; i<size; i++){
2216: tot += mydata[i];
2217: PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );
2218: }
2219: PetscFPrintf(comm, fd, "]\n" );
2220: messageLength = tot;
2221: }
2223: /* Reductions */
2224: MPI_Gather(&red,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);
2225: if (!rank){
2226: PetscFPrintf(comm, fd, "MPIReductions = [ " );
2227: tot = 0.0;
2228: for (i=0; i<size; i++){
2229: tot += mydata[i];
2230: PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );
2231: }
2232: PetscFPrintf(comm, fd, "]\n" );
2233: numReductions = tot;
2234: }
2236: /* Get total number of stages --
2237: Currently, a single processor can register more stages than another, but stages must all be registered in order.
2238: We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID.
2239: This seems best accomplished by assoicating a communicator with each stage.
2240: */
2241: MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm);
2242: PetscMalloc(numStages * sizeof(PetscBool), &localStageUsed);
2243: PetscMalloc(numStages * sizeof(PetscBool), &stageUsed);
2244: PetscMalloc(numStages * sizeof(PetscBool), &localStageVisible);
2245: PetscMalloc(numStages * sizeof(PetscBool), &stageVisible);
2246: if (numStages > 0) {
2247: stageInfo = stageLog->stageInfo;
2248: for(stage = 0; stage < numStages; stage++) {
2249: if (stage < stageLog->numStages) {
2250: localStageUsed[stage] = stageInfo[stage].used;
2251: localStageVisible[stage] = stageInfo[stage].perfInfo.visible;
2252: } else {
2253: localStageUsed[stage] = PETSC_FALSE;
2254: localStageVisible[stage] = PETSC_TRUE;
2255: }
2256: }
2257: MPI_Allreduce(localStageUsed, stageUsed, numStages, MPI_INT, MPI_LOR, comm);
2258: MPI_Allreduce(localStageVisible, stageVisible, numStages, MPI_INT, MPI_LAND, comm);
2259: for(stage = 0; stage < numStages; stage++) {
2260: if (stageUsed[stage]) {
2261: PetscFPrintf(comm, fd, "\n#Summary of Stages: ----- Time ------ ----- Flops ----- --- Messages --- -- Message Lengths -- -- Reductions --\n");
2262: PetscFPrintf(comm, fd, "# Avg %%Total Avg %%Total counts %%Total Avg %%Total counts %%Total \n");
2263: break;
2264: }
2265: }
2266: for(stage = 0; stage < numStages; stage++) {
2267: if (!stageUsed[stage]) continue;
2268: if (localStageUsed[stage]) {
2269: MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2270: MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2271: MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2272: MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2273: MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2274: name = stageInfo[stage].name;
2275: } else {
2276: MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2277: MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2278: MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2279: MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2280: MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2281: name = "";
2282: }
2283: mess *= 0.5; messLen *= 0.5; red /= size;
2284: if (TotalTime != 0.0) fracTime = stageTime/TotalTime; else fracTime = 0.0;
2285: if (TotalFlops != 0.0) fracFlops = flops/TotalFlops; else fracFlops = 0.0;
2286: /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */
2287: if (numMessages != 0.0) fracMessages = mess/numMessages; else fracMessages = 0.0;
2288: if (numMessages != 0.0) avgMessLen = messLen/numMessages; else avgMessLen = 0.0;
2289: if (messageLength != 0.0) fracLength = messLen/messageLength; else fracLength = 0.0;
2290: if (numReductions != 0.0) fracReductions = red/numReductions; else fracReductions = 0.0;
2291: PetscFPrintf(comm, fd, "# ");
2292: PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% \n",
2293: stage, name, stageTime/size, 100.0*fracTime, flops, 100.0*fracFlops,
2294: mess, 100.0*fracMessages, avgMessLen, 100.0*fracLength, red, 100.0*fracReductions);
2295: }
2296: }
2298: /* Report events */
2299: PetscFPrintf(comm,fd,"\n# Event\n");
2300: PetscFPrintf(comm,fd,"# ------------------------------------------------------\n");
2301: PetscFPrintf(comm,fd,"class Stage(object):\n");
2302: PetscFPrintf(comm,fd," def __init__(self, name, time, flops, numMessages, messageLength, numReductions):\n");
2303: PetscFPrintf(comm,fd," # The time and flops represent totals across processes, whereas reductions are only counted once\n");
2304: PetscFPrintf(comm,fd," self.name = name\n");
2305: PetscFPrintf(comm,fd," self.time = time\n");
2306: PetscFPrintf(comm,fd," self.flops = flops\n");
2307: PetscFPrintf(comm,fd," self.numMessages = numMessages\n");
2308: PetscFPrintf(comm,fd," self.messageLength = messageLength\n");
2309: PetscFPrintf(comm,fd," self.numReductions = numReductions\n");
2310: PetscFPrintf(comm,fd," self.event = {}\n");
2311: PetscFPrintf(comm,fd, "class Dummy(object):\n");
2312: PetscFPrintf(comm,fd, " pass\n");
2313: /* Problem: The stage name will not show up unless the stage executed on proc 1 */
2314: for(stage = 0; stage < numStages; stage++) {
2315: if (!stageVisible[stage]) continue;
2316: if (localStageUsed[stage]) {
2317: MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2318: MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2319: MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2320: MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2321: MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2322: } else {
2323: PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
2324: MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2325: MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2326: MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2327: MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2328: MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);
2329: }
2330: mess *= 0.5; messLen *= 0.5; red /= size;
2332: /* Get total number of events in this stage --
2333: Currently, a single processor can register more events than another, but events must all be registered in order,
2334: just like stages. We can removed this requirement if necessary by having a global event numbering and indirection
2335: on the event ID. This seems best accomplished by assoicating a communicator with each stage.
2337: Problem: If the event did not happen on proc 1, its name will not be available.
2338: Problem: Event visibility is not implemented
2339: */
2341: {
2342: size_t len, c;
2344: PetscStrcpy(stageName, stageInfo[stage].name);
2345: PetscStrlen(stageName, &len);
2346: for(c = 0; c < len; ++c) {
2347: if (stageName[c] == ' ') stageName[c] = '_';
2348: }
2349: }
2350: if (!rank){
2351: PetscFPrintf(comm, fd, "%s = Stage('%s', %g, %g, %g, %g, %g)\n", stageName, stageName, stageTime, flops, mess, messLen, red);
2352: }
2354: if (localStageUsed[stage]) {
2355: eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo;
2356: localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents;
2357: } else {
2358: localNumEvents = 0;
2359: }
2360: MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);
2361: for(event = 0; event < numEvents; event++) {
2362: PetscBool hasEvent = PETSC_TRUE;
2363: PetscMPIInt tmpI;
2364: PetscLogDouble tmpR;
2366: if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) {
2367: size_t len, c;
2369: MPI_Allreduce(&eventInfo[event].count, &maxCt, 1, MPI_INT, MPI_MAX, comm);
2370: PetscStrcpy(eventName, stageLog->eventLog->eventInfo[event].name);
2371: PetscStrlen(eventName, &len);
2372: for(c = 0; c < len; ++c) {
2373: if (eventName[c] == ' ') eventName[c] = '_';
2374: }
2375: } else {
2376: MPI_Allreduce(&ierr, &maxCt, 1, MPI_INT, MPI_MAX, comm);
2377: eventName[0] = 0;
2378: hasEvent = PETSC_FALSE;
2379: }
2381: if (maxCt != 0) {
2382: PetscFPrintf(comm, fd,"#\n");
2383: if (!rank){
2384: PetscFPrintf(comm, fd, "%s = Dummy()\n",eventName);
2385: PetscFPrintf(comm, fd, "%s.event['%s'] = %s\n",stageName,eventName,eventName);
2386: }
2387: /* Count */
2388: if (hasEvent) {tmpI = eventInfo[event].count;}
2389: else {tmpI = 0;}
2390: MPI_Gather(&tmpI,1, MPI_INT, mycount, 1, MPI_INT, 0, comm);
2391: PetscFPrintf(comm, fd, "%s.Count = [ ", eventName);
2392: if (!rank){
2393: for (i=0; i<size; i++){
2394: PetscFPrintf(comm, fd, " %7d,",mycount[i] );
2395: }
2396: PetscFPrintf(comm, fd, "]\n" );
2397: }
2398: /* Time */
2399: if (hasEvent) {tmpR = eventInfo[event].time;}
2400: else {tmpR = 0.0;}
2401: MPI_Gather(&tmpR, 1, MPIU_PETSCLOGDOUBLE, mydata, 1, MPIU_PETSCLOGDOUBLE, 0, comm);
2402: if (!rank){
2403: PetscFPrintf(comm, fd, "%s.Time = [ ", eventName);
2404: for (i=0; i<size; i++){
2405: PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );
2406: }
2407: PetscFPrintf(comm, fd, "]\n" );
2408: }
2409: /* Flops */
2410: if (hasEvent) {tmpR = eventInfo[event].flops;}
2411: else {tmpR = 0.0;}
2412: MPI_Gather(&tmpR, 1, MPIU_PETSCLOGDOUBLE, mydata, 1, MPIU_PETSCLOGDOUBLE, 0, comm);
2413: if (!rank){
2414: PetscFPrintf(comm, fd, "%s.Flops = [ ", eventName);
2415: for (i=0; i<size; i++){
2416: PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );
2417: }
2418: PetscFPrintf(comm, fd, "]\n" );
2419: }
2420: /* Num Messages */
2421: if (hasEvent) {tmpR = eventInfo[event].numMessages;}
2422: else {tmpR = 0.0;}
2423: MPI_Gather(&tmpR, 1, MPIU_PETSCLOGDOUBLE, mydata, 1, MPIU_PETSCLOGDOUBLE, 0, comm);
2424: PetscFPrintf(comm, fd, "%s.NumMessages = [ ", eventName);
2425: if (!rank){
2426: for (i=0; i<size; i++){
2427: PetscFPrintf(comm, fd, " %7.1e,",mydata[i] );
2428: }
2429: PetscFPrintf(comm, fd, "]\n" );
2430: }
2431: /* Message Length */
2432: if (hasEvent) {tmpR = eventInfo[event].messageLength;}
2433: else {tmpR = 0.0;}
2434: MPI_Gather(&tmpR, 1, MPIU_PETSCLOGDOUBLE, mydata, 1, MPIU_PETSCLOGDOUBLE, 0, comm);
2435: if (!rank){
2436: PetscFPrintf(comm, fd, "%s.MessageLength = [ ", eventName);
2437: for (i=0; i<size; i++){
2438: PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );
2439: }
2440: PetscFPrintf(comm, fd, "]\n" );
2441: }
2442: /* Num Reductions */
2443: if (hasEvent) {tmpR = eventInfo[event].numReductions;}
2444: else {tmpR = 0.0;}
2445: MPI_Gather(&tmpR, 1, MPIU_PETSCLOGDOUBLE, mydata, 1, MPIU_PETSCLOGDOUBLE, 0, comm);
2446: PetscFPrintf(comm, fd, "%s.NumReductions = [ ", eventName);
2447: if (!rank){
2448: for (i=0; i<size; i++){
2449: PetscFPrintf(comm, fd, " %7.1e,",mydata[i] );
2450: }
2451: PetscFPrintf(comm, fd, "]\n" );
2452: }
2453: }
2454: }
2455: }
2457: /* Right now, only stages on the first processor are reported here, meaning only objects associated with
2458: the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then
2459: stats for stages local to processor sets.
2460: */
2461: for(stage = 0; stage < numStages; stage++) {
2462: if (!localStageUsed[stage]) {
2463: PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);
2464: }
2465: }
2467: PetscFree(localStageUsed);
2468: PetscFree(stageUsed);
2469: PetscFree(localStageVisible);
2470: PetscFree(stageVisible);
2471: PetscFree(mydata);
2472: PetscFree(mycount);
2474: /* Information unrelated to this particular run */
2475: PetscFPrintf(comm, fd,
2476: "# ========================================================================================================================\n");
2477: PetscTime(y);
2478: PetscTime(x);
2479: PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y);
2480: PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y);
2481: PetscFPrintf(comm,fd,"AveragetimetogetPetscTime = %g\n", (y-x)/10.0);
2482: /* MPI information */
2483: if (size > 1) {
2484: MPI_Status status;
2485: PetscMPIInt tag;
2486: MPI_Comm newcomm;
2488: MPI_Barrier(comm);
2489: PetscTime(x);
2490: MPI_Barrier(comm);
2491: MPI_Barrier(comm);
2492: MPI_Barrier(comm);
2493: MPI_Barrier(comm);
2494: MPI_Barrier(comm);
2495: PetscTime(y);
2496: PetscFPrintf(comm, fd, "AveragetimeforMPI_Barrier = %g\n", (y-x)/5.0);
2497: PetscCommDuplicate(comm,&newcomm, &tag);
2498: MPI_Barrier(comm);
2499: if (rank) {
2500: MPI_Recv(0, 0, MPI_INT, rank-1, tag, newcomm, &status);
2501: MPI_Send(0, 0, MPI_INT, (rank+1)%size, tag, newcomm);
2502: } else {
2503: PetscTime(x);
2504: MPI_Send(0, 0, MPI_INT, 1, tag, newcomm);
2505: MPI_Recv(0, 0, MPI_INT, size-1, tag, newcomm, &status);
2506: PetscTime(y);
2507: PetscFPrintf(comm,fd,"AveragetimforzerosizeMPI_Send = %g\n", (y-x)/size);
2508: }
2509: PetscCommDestroy(&newcomm);
2510: }
2512: /* Cleanup */
2513: PetscFPrintf(comm, fd, "\n");
2514: PetscStageLogPush(stageLog, lastStage);
2515: return(0);
2516: }
2518: #else /* end of -DPETSC_USE_LOG section */
2522: PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...)
2523: {
2525: return(0);
2526: }
2528: #endif /* PETSC_USE_LOG*/
2531: PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID;
2532: PetscClassId PETSC_OBJECT_CLASSID = 0;
2536: /*@C
2537: PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code.
2539: Not Collective
2541: Input Parameter:
2542: . name - The class name
2543:
2544: Output Parameter:
2545: . oclass - The class id or classid
2547: Level: developer
2549: .keywords: log, class, register
2551: @*/
2552: PetscErrorCode PetscClassIdRegister(const char name[],PetscClassId *oclass )
2553: {
2554: #if defined(PETSC_USE_LOG)
2555: PetscStageLog stageLog;
2556: PetscInt stage;
2558: #endif
2561: *oclass = ++PETSC_LARGEST_CLASSID;
2562: #if defined(PETSC_USE_LOG)
2563: PetscLogGetStageLog(&stageLog);
2564: PetscClassRegLogRegister(stageLog->classLog, name, *oclass);
2565: for(stage = 0; stage < stageLog->numStages; stage++) {
2566: ClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses);
2567: }
2568: #endif
2569: return(0);
2570: }