Actual source code: petsclog.h

  1: /*
  2:     Defines profile/logging in PETSc.
  3: */
  4: #ifndef PETSCLOG_H
  5: #define PETSCLOG_H

  7: #include <petscsys.h>
  8: #include <petsctime.h>

 10: /* SUBMANSEC = Sys */

 12: /* General logging of information; different from event logging */
 13: PETSC_EXTERN PetscErrorCode PetscInfo_Private(const char[], PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(3, 4);
 14: #if defined(PETSC_USE_INFO)
 15:   #define PetscInfo(A, ...) PetscInfo_Private(PETSC_FUNCTION_NAME, ((PetscObject)A), __VA_ARGS__)
 16: #else
 17:   #define PetscInfo(A, ...) 0
 18: #endif

 20: #define PetscInfo1(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
 21: #define PetscInfo2(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
 22: #define PetscInfo3(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
 23: #define PetscInfo4(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
 24: #define PetscInfo5(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
 25: #define PetscInfo6(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
 26: #define PetscInfo7(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
 27: #define PetscInfo8(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
 28: #define PetscInfo9(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)

 30: /*E
 31:   PetscInfoCommFlag - Describes the method by which to filter `PetscInfo()` by communicator size

 33:   Used as an input for `PetscInfoSetFilterCommSelf()`

 35: + `PETSC_INFO_COMM_ALL` - Default uninitialized value. `PetscInfo()` will not filter based on
 36: communicator size (i.e. will print for all communicators)
 37: . `PETSC_INFO_COMM_NO_SELF` - `PetscInfo()` will NOT print for communicators with size = 1 (i.e. *_COMM_SELF)
 38: - `PETSC_INFO_COMM_ONLY_SELF` - `PetscInfo()` will ONLY print for communicators with size = 1

 40:   Level: intermediate

 42: .seealso: `PetscInfo()`, `PetscInfoSetFromOptions()`, `PetscInfoSetFilterCommSelf()`
 43: E*/
 44: typedef enum {
 45:   PETSC_INFO_COMM_ALL       = -1,
 46:   PETSC_INFO_COMM_NO_SELF   = 0,
 47:   PETSC_INFO_COMM_ONLY_SELF = 1
 48: } PetscInfoCommFlag;

 50: PETSC_EXTERN const char *const PetscInfoCommFlags[];
 51: PETSC_EXTERN PetscErrorCode    PetscInfoDeactivateClass(PetscClassId);
 52: PETSC_EXTERN PetscErrorCode    PetscInfoActivateClass(PetscClassId);
 53: PETSC_EXTERN PetscErrorCode    PetscInfoEnabled(PetscClassId, PetscBool *);
 54: PETSC_EXTERN PetscErrorCode    PetscInfoAllow(PetscBool);
 55: PETSC_EXTERN PetscErrorCode    PetscInfoSetFile(const char[], const char[]);
 56: PETSC_EXTERN PetscErrorCode    PetscInfoGetFile(char **, FILE **);
 57: PETSC_EXTERN PetscErrorCode    PetscInfoSetClasses(PetscBool, PetscInt, const char *const *);
 58: PETSC_EXTERN PetscErrorCode    PetscInfoGetClass(const char *, PetscBool *);
 59: PETSC_EXTERN PetscErrorCode    PetscInfoGetInfo(PetscBool *, PetscBool *, PetscBool *, PetscBool *, PetscInfoCommFlag *);
 60: PETSC_EXTERN PetscErrorCode    PetscInfoProcessClass(const char[], PetscInt, const PetscClassId[]);
 61: PETSC_EXTERN PetscErrorCode    PetscInfoSetFilterCommSelf(PetscInfoCommFlag);
 62: PETSC_EXTERN PetscErrorCode    PetscInfoSetFromOptions(PetscOptions);
 63: PETSC_EXTERN PetscErrorCode    PetscInfoDestroy(void);
 64: PETSC_EXTERN PetscBool         PetscLogPrintInfo; /* if true, indicates PetscInfo() is turned on */

 66: /*MC
 67:     PetscLogEvent - id used to identify PETSc or user events which timed portions (blocks of executable)
 68:      code.

 70:     Level: intermediate

 72: .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStage`
 73: M*/
 74: typedef int PetscLogEvent;

 76: /*MC
 77:     PetscLogStage - id used to identify user stages (phases, sections) of runs - for logging

 79:     Level: intermediate

 81: .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEvent`
 82: M*/
 83: typedef int PetscLogStage;

 85: #define PETSC_EVENT 1311311
 86: PETSC_EXTERN PetscLogEvent PETSC_LARGEST_EVENT;

 88: /* Global flop counter */
 89: PETSC_EXTERN PetscLogDouble petsc_TotalFlops;
 90: PETSC_EXTERN PetscLogDouble petsc_tmp_flops;

 92: /* We must make the following structures available to access the event
 93:      activation flags in the PetscLogEventBegin/End() macros. These are not part of the PETSc public
 94:      API and are not intended to be used by other parts of PETSc or by users.

 96:      The code that manipulates these structures is in src/sys/logging/utils.
 97: */
 98: typedef struct _n_PetscIntStack *PetscIntStack;

100: /* -----------------------------------------------------------------------------------------------------*/
101: /*
102:     PetscClassRegInfo, PetscClassPerfInfo - Each class has two data structures associated with it. The first has
103:        static information about it, the second collects statistics on how many objects of the class are created,
104:        how much memory they use, etc.

106:     PetscClassRegLog, PetscClassPerfLog - arrays of the PetscClassRegInfo and PetscClassPerfInfo for all classes.
107: */
108: typedef struct {
109:   char        *name;    /* The class name */
110:   PetscClassId classid; /* The integer identifying this class */
111: } PetscClassRegInfo;

113: typedef struct {
114:   PetscClassId   id;           /* The integer identifying this class */
115:   int            creations;    /* The number of objects of this class created */
116:   int            destructions; /* The number of objects of this class destroyed */
117:   PetscLogDouble mem;          /* The total memory allocated by objects of this class; this is completely wrong and should possibly be removed */
118:   PetscLogDouble descMem;      /* The total memory allocated by descendents of these objects; this is completely wrong and should possibly be removed */
119: } PetscClassPerfInfo;

121: typedef struct _n_PetscClassRegLog *PetscClassRegLog;
122: struct _n_PetscClassRegLog {
123:   int                numClasses; /* The number of classes registered */
124:   int                maxClasses; /* The maximum number of classes */
125:   PetscClassRegInfo *classInfo;  /* The structure for class information (classids are monotonicly increasing) */
126: };

128: typedef struct _n_PetscClassPerfLog *PetscClassPerfLog;
129: struct _n_PetscClassPerfLog {
130:   int                 numClasses; /* The number of logging classes */
131:   int                 maxClasses; /* The maximum number of classes */
132:   PetscClassPerfInfo *classInfo;  /* The structure for class information (classids are monotonicly increasing) */
133: };
134: /* -----------------------------------------------------------------------------------------------------*/
135: /*
136:     PetscEventRegInfo, PetscEventPerfInfo - Each event has two data structures associated with it. The first has
137:        static information about it, the second collects statistics on how many times the event is used, how
138:        much time it takes, etc.

140:     PetscEventRegLog, PetscEventPerfLog - an array of all PetscEventRegInfo and PetscEventPerfInfo for all events. There is one
141:       of these for each stage.

143: */
144: typedef struct {
145:   char        *name;       /* The name of this event */
146:   PetscClassId classid;    /* The class the event is associated with */
147:   PetscBool    collective; /* Flag this event as collective */
148: #if defined(PETSC_HAVE_MPE)
149:   int mpe_id_begin; /* MPE IDs that define the event */
150:   int mpe_id_end;
151: #endif
152: } PetscEventRegInfo;

154: typedef struct {
155:   int            id;                      /* The integer identifying this event */
156:   PetscBool      active;                  /* The flag to activate logging */
157:   PetscBool      visible;                 /* The flag to print info in summary */
158:   int            depth;                   /* The nesting depth of the event call */
159:   int            count;                   /* The number of times this event was executed */
160:   PetscLogDouble flops, flops2, flopsTmp; /* The flops and flops^2 used in this event */
161:   PetscLogDouble time, time2, timeTmp;    /* The time and time^2 taken for this event */
162:   PetscLogDouble syncTime;                /* The synchronization barrier time */
163:   PetscLogDouble dof[8];                  /* The number of degrees of freedom associated with this event */
164:   PetscLogDouble errors[8];               /* The errors (user-defined) associated with this event */
165:   PetscLogDouble numMessages;             /* The number of messages in this event */
166:   PetscLogDouble messageLength;           /* The total message lengths in this event */
167:   PetscLogDouble numReductions;           /* The number of reductions in this event */
168:   PetscLogDouble memIncrease;             /* How much the resident memory has increased in this event */
169:   PetscLogDouble mallocIncrease;          /* How much the maximum malloced space has increased in this event */
170:   PetscLogDouble mallocSpace;             /* How much the space was malloced and kept during this event */
171:   PetscLogDouble mallocIncreaseEvent;     /* Maximum of the high water mark with in event minus memory available at the end of the event */
172: #if defined(PETSC_HAVE_DEVICE)
173:   PetscLogDouble CpuToGpuCount; /* The total number of CPU to GPU copies */
174:   PetscLogDouble GpuToCpuCount; /* The total number of GPU to CPU copies */
175:   PetscLogDouble CpuToGpuSize;  /* The total size of CPU to GPU copies */
176:   PetscLogDouble GpuToCpuSize;  /* The total size of GPU to CPU copies */
177:   PetscLogDouble GpuFlops;      /* The flops done on a GPU in this event */
178:   PetscLogDouble GpuTime;       /* The time spent on a GPU in this event */
179: #endif
180: } PetscEventPerfInfo;

182: typedef struct _n_PetscEventRegLog *PetscEventRegLog;
183: struct _n_PetscEventRegLog {
184:   int                numEvents; /* The number of registered events */
185:   int                maxEvents; /* The maximum number of events */
186:   PetscEventRegInfo *eventInfo; /* The registration information for each event */
187: };

189: typedef struct _n_PetscEventPerfLog *PetscEventPerfLog;
190: struct _n_PetscEventPerfLog {
191:   int                 numEvents; /* The number of logging events */
192:   int                 maxEvents; /* The maximum number of events */
193:   PetscEventPerfInfo *eventInfo; /* The performance information for each event */
194: };
195: /* ------------------------------------------------------------------------------------------------------------*/
196: /*
197:    PetscStageInfo - Contains all the information about a particular stage.

199:    PetscStageLog - An array of PetscStageInfo for each registered stage. There is a single one of these in the code.
200: */
201: typedef struct _PetscStageInfo {
202:   char              *name;     /* The stage name */
203:   PetscBool          used;     /* The stage was pushed on this processor */
204:   PetscEventPerfInfo perfInfo; /* The stage performance information */
205:   PetscEventPerfLog  eventLog; /* The event information for this stage */
206:   PetscClassPerfLog  classLog; /* The class information for this stage */
207: } PetscStageInfo;

209: typedef struct _n_PetscStageLog *PetscStageLog;
210: struct _n_PetscStageLog {
211:   int              numStages; /* The number of registered stages */
212:   int              maxStages; /* The maximum number of stages */
213:   PetscIntStack    stack;     /* The stack for active stages */
214:   int              curStage;  /* The current stage (only used in macros so we don't call PetscIntStackTop) */
215:   PetscStageInfo  *stageInfo; /* The information for each stage */
216:   PetscEventRegLog eventLog;  /* The registered events */
217:   PetscClassRegLog classLog;  /* The registered classes */
218: };
219: /* -----------------------------------------------------------------------------------------------------*/

221: PETSC_DEPRECATED_FUNCTION("PetscLogObjectParent() is deprecated (since version 3.18)") static inline PetscErrorCode PetscLogObjectParent(PetscObject o, PetscObject p)
222: {
223:   (void)o;
224:   (void)p;
225:   return 0;
226: }

228: PETSC_DEPRECATED_FUNCTION("PetscLogObjectMemory() is deprecated (since version 3.18)") static inline PetscErrorCode PetscLogObjectMemory(PetscObject o, PetscLogDouble m)
229: {
230:   (void)o;
231:   (void)m;
232:   return 0;
233: }

235: #if defined(PETSC_USE_LOG) /* --- Logging is turned on --------------------------------*/
236: PETSC_EXTERN PetscStageLog  petsc_stageLog;
237: PETSC_EXTERN PetscErrorCode PetscLogGetStageLog(PetscStageLog *);
238: PETSC_EXTERN PetscErrorCode PetscStageLogGetCurrent(PetscStageLog, int *);
239: PETSC_EXTERN PetscErrorCode PetscStageLogGetEventPerfLog(PetscStageLog, int, PetscEventPerfLog *);

241:   /*
242:    Flop counting:  We count each arithmetic operation (e.g., addition, multiplication) separately.

244:    For the complex numbers version, note that
245:        1 complex addition = 2 flops
246:        1 complex multiplication = 6 flops,
247:    where we define 1 flop as that for a double precision scalar.  We roughly approximate
248:    flop counting for complex numbers by multiplying the total flops by 4; this corresponds
249:    to the assumption that we're counting mostly additions and multiplications -- and
250:    roughly the same number of each.  More accurate counting could be done by distinguishing
251:    among the various arithmetic operations.
252:  */

254:   #if defined(PETSC_USE_COMPLEX)
255:     #define PETSC_FLOPS_PER_OP 4.0
256:   #else
257:     #define PETSC_FLOPS_PER_OP 1.0
258:   #endif

260: /*@C
261:        PetscLogFlops - Log how many flops are performed in a calculation

263:    Input Parameter:
264: .   flops - the number of flops

266:    Level: intermediate

268:    Note:
269:      To limit the chance of integer overflow when multiplying by a constant, represent the constant as a double,
270:      not an integer. Use `PetscLogFlops`(4.0*n) not `PetscLogFlops`(4*n)

272: .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`
273: @*/

275: static inline PetscErrorCode PetscLogFlops(PetscLogDouble n)
276: {
277:   #if defined(PETSC_USE_DEBUG)
279:   #endif
280:   petsc_TotalFlops += PETSC_FLOPS_PER_OP * n;
281:   return 0;
282: }

284: PETSC_EXTERN PetscErrorCode PetscGetFlops(PetscLogDouble *);

286:   #if defined(PETSC_HAVE_MPE)
287: PETSC_EXTERN PetscErrorCode PetscLogMPEBegin(void);
288: PETSC_EXTERN PetscErrorCode PetscLogMPEDump(const char[]);
289:   #endif

291: PETSC_EXTERN PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject);
292: PETSC_EXTERN PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject);
293: PETSC_EXTERN PetscErrorCode (*PetscLogPHC)(PetscObject);
294: PETSC_EXTERN PetscErrorCode (*PetscLogPHD)(PetscObject);

296:   #define PetscLogObjectParents(p, n, d) PetscMacroReturnStandard(for (int _i = 0; _i < (n); ++_i) PetscLogObjectParent((PetscObject)(p), (PetscObject)(d)[_i]);)
297:   #define PetscLogObjectCreate(h)        ((PetscLogPHC) ? (*PetscLogPHC)((PetscObject)(h)) : 0)
298:   #define PetscLogObjectDestroy(h)       ((PetscLogPHD) ? (*PetscLogPHD)((PetscObject)(h)) : 0)
299: PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(2, 3);

301: /* Initialization functions */
302: PETSC_EXTERN PetscErrorCode PetscLogDefaultBegin(void);
303: PETSC_EXTERN PetscErrorCode PetscLogAllBegin(void);
304: PETSC_EXTERN PetscErrorCode PetscLogNestedBegin(void);
305: PETSC_EXTERN PetscErrorCode PetscLogTraceBegin(FILE *);
306: PETSC_EXTERN PetscErrorCode PetscLogActions(PetscBool);
307: PETSC_EXTERN PetscErrorCode PetscLogObjects(PetscBool);
308: PETSC_EXTERN PetscErrorCode PetscLogSetThreshold(PetscLogDouble, PetscLogDouble *);
309: PETSC_EXTERN PetscErrorCode PetscLogSet(PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject));

311: /* Output functions */
312: PETSC_EXTERN PetscErrorCode PetscLogView(PetscViewer);
313: PETSC_EXTERN PetscErrorCode PetscLogViewFromOptions(void);
314: PETSC_EXTERN PetscErrorCode PetscLogDump(const char[]);

316: /* Status checking functions */
317: PETSC_EXTERN PetscErrorCode PetscLogIsActive(PetscBool *);

319: /* Stage functions */
320: PETSC_EXTERN PetscErrorCode PetscLogStageRegister(const char[], PetscLogStage *);
321: PETSC_EXTERN PetscErrorCode PetscLogStagePush(PetscLogStage);
322: PETSC_EXTERN PetscErrorCode PetscLogStagePop(void);
323: PETSC_EXTERN PetscErrorCode PetscLogStageSetActive(PetscLogStage, PetscBool);
324: PETSC_EXTERN PetscErrorCode PetscLogStageGetActive(PetscLogStage, PetscBool *);
325: PETSC_EXTERN PetscErrorCode PetscLogStageSetVisible(PetscLogStage, PetscBool);
326: PETSC_EXTERN PetscErrorCode PetscLogStageGetVisible(PetscLogStage, PetscBool *);
327: PETSC_EXTERN PetscErrorCode PetscLogStageGetId(const char[], PetscLogStage *);

329: /* Event functions */
330: PETSC_EXTERN PetscErrorCode PetscLogEventRegister(const char[], PetscClassId, PetscLogEvent *);
331: PETSC_EXTERN PetscErrorCode PetscLogEventSetCollective(PetscLogEvent, PetscBool);
332: PETSC_EXTERN PetscErrorCode PetscLogEventIncludeClass(PetscClassId);
333: PETSC_EXTERN PetscErrorCode PetscLogEventExcludeClass(PetscClassId);
334: PETSC_EXTERN PetscErrorCode PetscLogEventActivate(PetscLogEvent);
335: PETSC_EXTERN PetscErrorCode PetscLogEventDeactivate(PetscLogEvent);
336: PETSC_EXTERN PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent);
337: PETSC_EXTERN PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent);
338: PETSC_EXTERN PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent, PetscBool);
339: PETSC_EXTERN PetscErrorCode PetscLogEventActivateClass(PetscClassId);
340: PETSC_EXTERN PetscErrorCode PetscLogEventDeactivateClass(PetscClassId);
341: PETSC_EXTERN PetscErrorCode PetscLogEventGetId(const char[], PetscLogEvent *);
342: PETSC_EXTERN PetscErrorCode PetscLogEventGetPerfInfo(int, PetscLogEvent, PetscEventPerfInfo *);
343: PETSC_EXTERN PetscErrorCode PetscLogEventSetDof(PetscLogEvent, PetscInt, PetscLogDouble);
344: PETSC_EXTERN PetscErrorCode PetscLogEventSetError(PetscLogEvent, PetscInt, PetscLogDouble);
345: PETSC_EXTERN PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent);
346: PETSC_EXTERN PetscErrorCode PetscLogPopCurrentEvent_Internal(void);

348: /* Global counters */
349: PETSC_EXTERN PetscLogDouble petsc_irecv_ct;
350: PETSC_EXTERN PetscLogDouble petsc_isend_ct;
351: PETSC_EXTERN PetscLogDouble petsc_recv_ct;
352: PETSC_EXTERN PetscLogDouble petsc_send_ct;
353: PETSC_EXTERN PetscLogDouble petsc_irecv_len;
354: PETSC_EXTERN PetscLogDouble petsc_isend_len;
355: PETSC_EXTERN PetscLogDouble petsc_recv_len;
356: PETSC_EXTERN PetscLogDouble petsc_send_len;
357: PETSC_EXTERN PetscLogDouble petsc_allreduce_ct;
358: PETSC_EXTERN PetscLogDouble petsc_gather_ct;
359: PETSC_EXTERN PetscLogDouble petsc_scatter_ct;
360: PETSC_EXTERN PetscLogDouble petsc_wait_ct;
361: PETSC_EXTERN PetscLogDouble petsc_wait_any_ct;
362: PETSC_EXTERN PetscLogDouble petsc_wait_all_ct;
363: PETSC_EXTERN PetscLogDouble petsc_sum_of_waits_ct;

365: PETSC_EXTERN PetscBool PetscLogMemory;

367: PETSC_EXTERN PetscBool      PetscLogSyncOn; /* true if logging synchronization is enabled */
368: PETSC_EXTERN PetscErrorCode PetscLogEventSynchronize(PetscLogEvent, MPI_Comm);

370:   #define PetscLogEventSync(e, comm) \
371:     (((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? PetscLogEventSynchronize((e), (comm)) : 0))

373:   #define PetscLogEventBegin(e, o1, o2, o3, o4) \
374:     ((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? (((*PetscLogPLB)((e), 0, (PetscObject)(o1), (PetscObject)(o2), (PetscObject)(o3), (PetscObject)(o4))) || PetscLogPushCurrentEvent_Internal(e)) : 0)

376:   #define PetscLogEventEnd(e, o1, o2, o3, o4) \
377:     ((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? (((*PetscLogPLE)((e), 0, (PetscObject)(o1), (PetscObject)(o2), (PetscObject)(o3), (PetscObject)(o4))) || PetscLogPopCurrentEvent_Internal()) : 0)

379: PETSC_EXTERN PetscErrorCode PetscLogEventGetFlops(PetscLogEvent, PetscLogDouble *);
380: PETSC_EXTERN PetscErrorCode PetscLogEventZeroFlops(PetscLogEvent);

382:   /*
383:      These are used internally in the PETSc routines to keep a count of MPI messages and
384:    their sizes.

386:      This does not work for MPI-Uni because our include/petsc/mpiuni/mpi.h file
387:    uses macros to defined the MPI operations.

389:      It does not work correctly from HP-UX because it processes the
390:    macros in a way that sometimes it double counts, hence
391:    PETSC_HAVE_BROKEN_RECURSIVE_MACRO

393:      It does not work with Windows because winmpich lacks MPI_Type_size()
394: */
395:   #if !defined(MPIUNI_H) && !defined(PETSC_HAVE_BROKEN_RECURSIVE_MACRO) && !defined(PETSC_HAVE_MPI_MISSING_TYPESIZE)
396: /*
397:    Logging of MPI activities
398: */
399: static inline PetscErrorCode PetscMPITypeSize(PetscInt count, MPI_Datatype type, PetscLogDouble *length)
400: {
401:   PetscMPIInt typesize;

403:   if (type == MPI_DATATYPE_NULL) return 0;
404:   MPI_Type_size(type, &typesize);
405:   *length += (PetscLogDouble)(count * typesize);
406:   return 0;
407: }

409: static inline PetscErrorCode PetscMPITypeSizeComm(MPI_Comm comm, const PetscMPIInt *counts, MPI_Datatype type, PetscLogDouble *length)
410: {
411:   PetscMPIInt typesize, size, p;

413:   if (type == MPI_DATATYPE_NULL) return 0;
414:   MPI_Comm_size(comm, &size);
415:   MPI_Type_size(type, &typesize);
416:   for (p = 0; p < size; ++p) *length += (PetscLogDouble)(counts[p] * typesize);
417:   return 0;
418: }

420: static inline PetscErrorCode PetscMPITypeSizeCount(PetscInt n, const PetscMPIInt *counts, MPI_Datatype type, PetscLogDouble *length)
421: {
422:   PetscMPIInt typesize, p;

424:   if (type == MPI_DATATYPE_NULL) return 0;
425:   MPI_Type_size(type, &typesize);
426:   for (p = 0; p < n; ++p) *length += (PetscLogDouble)(counts[p] * typesize);
427:   return 0;
428: }

430: /*
431:     Returns 1 if the communicator is parallel else zero
432: */
433: static inline int PetscMPIParallelComm(MPI_Comm comm)
434: {
435:   PetscMPIInt size;
436:   MPI_Comm_size(comm, &size);
437:   return size > 1;
438: }

440:     #define MPI_Irecv(buf, count, datatype, source, tag, comm, request) ((petsc_irecv_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len)) || MPI_Irecv((buf), (count), (datatype), (source), (tag), (comm), (request)))

442:     #define MPI_Irecv_c(buf, count, datatype, source, tag, comm, request) ((petsc_irecv_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len)) || MPI_Irecv_c((buf), (count), (datatype), (source), (tag), (comm), (request)))

444:     #define MPI_Isend(buf, count, datatype, dest, tag, comm, request) ((petsc_isend_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len)) || MPI_Isend((buf), (count), (datatype), (dest), (tag), (comm), (request)))

446:     #define MPI_Isend_c(buf, count, datatype, dest, tag, comm, request) ((petsc_isend_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len)) || MPI_Isend_c((buf), (count), (datatype), (dest), (tag), (comm), (request)))

448:     #define MPI_Startall_irecv(count, datatype, number, requests) ((petsc_irecv_ct += (PetscLogDouble)(number), 0) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len)) || ((number) && MPI_Startall((number), (requests))))

450:     #define MPI_Startall_isend(count, datatype, number, requests) ((petsc_isend_ct += (PetscLogDouble)(number), 0) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len)) || ((number) && MPI_Startall((number), (requests))))

452:     #define MPI_Start_isend(count, datatype, requests) ((petsc_isend_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_isend_len)) || MPI_Start((requests)))

454:     #define MPI_Recv(buf, count, datatype, source, tag, comm, status) ((petsc_recv_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_recv_len)) || MPI_Recv((buf), (count), (datatype), (source), (tag), (comm), (status)))

456:     #define MPI_Recv_c(buf, count, datatype, source, tag, comm, status) ((petsc_recv_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_recv_len)) || MPI_Recv_c((buf), (count), (datatype), (source), (tag), (comm), (status)))

458:     #define MPI_Send(buf, count, datatype, dest, tag, comm) ((petsc_send_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_send_len)) || MPI_Send((buf), (count), (datatype), (dest), (tag), (comm)))

460:     #define MPI_Send_c(buf, count, datatype, dest, tag, comm) ((petsc_send_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_send_len)) || MPI_Send_c((buf), (count), (datatype), (dest), (tag), (comm)))

462:     #define MPI_Wait(request, status) ((petsc_wait_ct++, petsc_sum_of_waits_ct++, 0) || MPI_Wait((request), (status)))

464:     #define MPI_Waitany(a, b, c, d) ((petsc_wait_any_ct++, petsc_sum_of_waits_ct++, 0) || MPI_Waitany((a), (b), (c), (d)))

466:     #define MPI_Waitall(count, array_of_requests, array_of_statuses) ((petsc_wait_all_ct++, petsc_sum_of_waits_ct += (PetscLogDouble)(count), 0) || MPI_Waitall((count), (array_of_requests), (array_of_statuses)))

468:     #define MPI_Allreduce(sendbuf, recvbuf, count, datatype, op, comm) (petsc_allreduce_ct += PetscMPIParallelComm((comm)), MPI_Allreduce((sendbuf), (recvbuf), (count), (datatype), (op), (comm)))

470:     #define MPI_Bcast(buffer, count, datatype, root, comm) ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || MPI_Bcast((buffer), (count), (datatype), (root), (comm)))

472:     #define MPI_Reduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm) ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || MPI_Reduce_scatter_block((sendbuf), (recvbuf), (recvcount), (datatype), (op), (comm)))

474:     #define MPI_Alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm) \
475:       ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Alltoall((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm)))

477:     #define MPI_Alltoallv(sendbuf, sendcnts, sdispls, sendtype, recvbuf, recvcnts, rdispls, recvtype, comm) \
478:       ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSizeComm((comm), (sendcnts), (sendtype), (&petsc_send_len)) || MPI_Alltoallv((sendbuf), (sendcnts), (sdispls), (sendtype), (recvbuf), (recvcnts), (rdispls), (recvtype), (comm)))

480:     #define MPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm) ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Allgather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm)))

482:     #define MPI_Allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, comm) \
483:       ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Allgatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (comm)))

485:     #define MPI_Gather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm) \
486:       ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Gather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm)))

488:     #define MPI_Gatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, root, comm) \
489:       ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Gatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (root), (comm)))

491:     #define MPI_Scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm) \
492:       ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Scatter((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm)))

494:     #define MPI_Scatterv(sendbuf, sendcount, displs, sendtype, recvbuf, recvcount, recvtype, root, comm) \
495:       ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Scatterv((sendbuf), (sendcount), (displs), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm)))

497:     #define MPI_Ialltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request) \
498:       ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Ialltoall((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm), (request)))

500:     #define MPI_Ialltoallv(sendbuf, sendcnts, sdispls, sendtype, recvbuf, recvcnts, rdispls, recvtype, comm, request) \
501:       ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSizeComm((comm), (sendcnts), (sendtype), (&petsc_send_len)) || MPI_Ialltoallv((sendbuf), (sendcnts), (sdispls), (sendtype), (recvbuf), (recvcnts), (rdispls), (recvtype), (comm), (request)))

503:     #define MPI_Iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request) \
504:       ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Iallgather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm), (request)))

506:     #define MPI_Iallgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, comm, request) \
507:       ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Iallgatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (comm), (request)))

509:     #define MPI_Igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \
510:       ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Igather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request)))

512:     #define MPI_Igatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, root, comm, request) \
513:       ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Igatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (root), (comm), (request)))

515:     #define MPI_Iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \
516:       ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Iscatter((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request)))

518:     #define MPI_Iscatterv(sendbuf, sendcount, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \
519:       ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Iscatterv((sendbuf), (sendcount), (displs), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request)))

521:   #else

523:     #define MPI_Startall_irecv(count, datatype, number, requests) ((number) && MPI_Startall((number), (requests)))

525:     #define MPI_Startall_isend(count, datatype, number, requests) ((number) && MPI_Startall((number), (requests)))

527:     #define MPI_Start_isend(count, datatype, requests) (MPI_Start((requests)))

529:   #endif /* !MPIUNI_H && ! PETSC_HAVE_BROKEN_RECURSIVE_MACRO */

531: #else /* ---Logging is turned off --------------------------------------------*/

533:   #define PetscLogMemory PETSC_FALSE

535:   #define PetscLogFlops(n) 0
536:   #define PetscGetFlops(a) (*(a) = 0.0, 0)

538:   #define PetscLogStageRegister(a, b)   0
539:   #define PetscLogStagePush(a)          0
540:   #define PetscLogStagePop()            0
541:   #define PetscLogStageSetActive(a, b)  0
542:   #define PetscLogStageGetActive(a, b)  0
543:   #define PetscLogStageGetVisible(a, b) 0
544:   #define PetscLogStageSetVisible(a, b) 0
545:   #define PetscLogStageGetId(a, b)      (*(b) = 0, 0)

547:   #define PetscLogEventRegister(a, b, c)    0
548:   #define PetscLogEventSetCollective(a, b)  0
549:   #define PetscLogEventIncludeClass(a)      0
550:   #define PetscLogEventExcludeClass(a)      0
551:   #define PetscLogEventActivate(a)          0
552:   #define PetscLogEventDeactivate(a)        0
553:   #define PetscLogEventDeactivatePush(a)    0
554:   #define PetscLogEventDeactivatePop(a)     0
555:   #define PetscLogEventActivateClass(a)     0
556:   #define PetscLogEventDeactivateClass(a)   0
557:   #define PetscLogEventSetActiveAll(a, b)   0
558:   #define PetscLogEventGetId(a, b)          (*(b) = 0, 0)
559:   #define PetscLogEventGetPerfInfo(a, b, c) 0
560:   #define PetscLogEventSetDof(a, b, c)      0
561:   #define PetscLogEventSetError(a, b, c)    0

563:   #define PetscLogPLB 0
564:   #define PetscLogPLE 0
565:   #define PetscLogPHC 0
566:   #define PetscLogPHD 0

568:   #define PetscLogObjectParents(p, n, c) 0
569:   #define PetscLogObjectCreate(h)        0
570:   #define PetscLogObjectDestroy(h)       0
571: PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(2, 3);

573:   #define PetscLogDefaultBegin()     0
574:   #define PetscLogAllBegin()         0
575:   #define PetscLogNestedBegin()      0
576:   #define PetscLogTraceBegin(file)   0
577:   #define PetscLogActions(a)         0
578:   #define PetscLogObjects(a)         0
579:   #define PetscLogSetThreshold(a, b) 0
580:   #define PetscLogSet(lb, le)        0
581:   #define PetscLogIsActive(flag)     (*(flag) = PETSC_FALSE, 0)

583:   #define PetscLogView(viewer)      0
584:   #define PetscLogViewFromOptions() 0
585:   #define PetscLogDump(c)           0

587:   #define PetscLogEventSync(e, comm)                            0
588:   #define PetscLogEventBegin(e, o1, o2, o3, o4)                 0
589:   #define PetscLogEventEnd(e, o1, o2, o3, o4)                   0

591:   /* If PETSC_USE_LOG is NOT defined, these still need to be! */
592:   #define MPI_Startall_irecv(count, datatype, number, requests) ((number) && MPI_Startall(number, requests))
593:   #define MPI_Startall_isend(count, datatype, number, requests) ((number) && MPI_Startall(number, requests))
594:   #define MPI_Start_isend(count, datatype, requests)            MPI_Start(requests)

596: #endif /* PETSC_USE_LOG */

598: #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_DEVICE)

600: /* Global GPU counters */
601: PETSC_EXTERN PetscLogDouble petsc_ctog_ct;
602: PETSC_EXTERN PetscLogDouble petsc_gtoc_ct;
603: PETSC_EXTERN PetscLogDouble petsc_ctog_sz;
604: PETSC_EXTERN PetscLogDouble petsc_gtoc_sz;
605: PETSC_EXTERN PetscLogDouble petsc_ctog_ct_scalar;
606: PETSC_EXTERN PetscLogDouble petsc_gtoc_ct_scalar;
607: PETSC_EXTERN PetscLogDouble petsc_ctog_sz_scalar;
608: PETSC_EXTERN PetscLogDouble petsc_gtoc_sz_scalar;
609: PETSC_EXTERN PetscLogDouble petsc_gflops;
610: PETSC_EXTERN PetscLogDouble petsc_gtime;

612: static inline PetscErrorCode PetscLogCpuToGpu(PetscLogDouble size)
613: {
614:   petsc_ctog_ct += 1;
615:   petsc_ctog_sz += size;
616:   return 0;
617: }

619: static inline PetscErrorCode PetscLogGpuToCpu(PetscLogDouble size)
620: {
621:   petsc_gtoc_ct += 1;
622:   petsc_gtoc_sz += size;
623:   return 0;
624: }

626: static inline PetscErrorCode PetscLogCpuToGpuScalar(PetscLogDouble size)
627: {
628:   petsc_ctog_ct_scalar += 1;
629:   petsc_ctog_sz_scalar += size;
630:   return 0;
631: }

633: static inline PetscErrorCode PetscLogGpuToCpuScalar(PetscLogDouble size)
634: {
635:   petsc_gtoc_ct_scalar += 1;
636:   petsc_gtoc_sz_scalar += size;
637:   return 0;
638: }

640: /*@C
641:        PetscLogGpuFlops - Log how many flops are performed in a calculation on the device

643:    Input Parameter:
644: .   flops - the number of flops

646:    Level: intermediate

648:    Notes:
649:      To limit the chance of integer overflow when multiplying by a constant, represent the constant as a double,
650:      not an integer. Use `PetscLogFlops`(4.0*n) not `PetscLogFlops`(4*n)

652:      The values are also added to the total flop count for the MPI rank that is set with `PetscLogFlops()`; hence the number of flops
653:      just on the CPU would be the value from set from `PetscLogFlops()` minus the value set from `PetscLogGpuFlops()`

655: .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogFlops()`, `PetscLogGpuTimeBegin()`, `PetscLogGpuTimeEnd()`
656: @*/
657: static inline PetscErrorCode PetscLogGpuFlops(PetscLogDouble n)
658: {
660:   petsc_TotalFlops += PETSC_FLOPS_PER_OP * n;
661:   petsc_gflops += PETSC_FLOPS_PER_OP * n;
662:   return 0;
663: }

665: static inline PetscErrorCode PetscLogGpuTimeAdd(PetscLogDouble t)
666: {
667:   petsc_gtime += t;
668:   return 0;
669: }

671: PETSC_EXTERN PetscErrorCode PetscLogGpuTime(void);
672: PETSC_EXTERN PetscErrorCode PetscLogGpuTimeBegin(void);
673: PETSC_EXTERN PetscErrorCode PetscLogGpuTimeEnd(void);

675: #else

677:   #define PetscLogCpuToGpu(a)       0
678:   #define PetscLogGpuToCpu(a)       0
679:   #define PetscLogCpuToGpuScalar(a) 0
680:   #define PetscLogGpuToCpuScalar(a) 0
681:   #define PetscLogGpuFlops(a)       0
682:   #define PetscLogGpuTime()         0
683:   #define PetscLogGpuTimeAdd(a)     0
684:   #define PetscLogGpuTimeBegin()    0
685:   #define PetscLogGpuTimeEnd()      0

687: #endif /* PETSC_USE_LOG && PETSC_HAVE_DEVICE */

689: #define PetscPreLoadBegin(flag, name) \
690:   do { \
691:     PetscBool     PetscPreLoading = flag; \
692:     int           PetscPreLoadMax, PetscPreLoadIt; \
693:     PetscLogStage _stageNum; \
694:     PetscOptionsGetBool(NULL, NULL, "-preload", &PetscPreLoading, NULL); \
695:     PetscPreLoadMax     = (int)(PetscPreLoading); \
696:     PetscPreLoadingUsed = PetscPreLoading ? PETSC_TRUE : PetscPreLoadingUsed; \
697:     for (PetscPreLoadIt = 0; PetscPreLoadIt <= PetscPreLoadMax; PetscPreLoadIt++) { \
698:       PetscPreLoadingOn = PetscPreLoading; \
699:       PetscBarrier(NULL); \
700:       if (PetscPreLoadIt > 0) PetscLogStageGetId(name, &_stageNum); \
701:       else PetscLogStageRegister(name, &_stageNum); \
702:       PetscLogStageSetActive(_stageNum, (PetscBool)(!PetscPreLoadMax || PetscPreLoadIt)); \
703:       PetscLogStagePush(_stageNum);

705: #define PetscPreLoadEnd() \
706:   PetscLogStagePop(); \
707:   PetscPreLoading = PETSC_FALSE; \
708:   } \
709:   } \
710:   while (0)

712: #define PetscPreLoadStage(name) \
713:   do { \
714:     PetscLogStagePop(); \
715:     if (PetscPreLoadIt > 0) PetscLogStageGetId(name, &_stageNum); \
716:     else PetscLogStageRegister(name, &_stageNum); \
717:     PetscLogStageSetActive(_stageNum, (PetscBool)(!PetscPreLoadMax || PetscPreLoadIt)); \
718:     PetscLogStagePush(_stageNum); \
719:   } while (0)

721: /* some vars for logging */
722: PETSC_EXTERN PetscBool PetscPreLoadingUsed; /* true if we are or have done preloading */
723: PETSC_EXTERN PetscBool PetscPreLoadingOn;   /* true if we are currently in a preloading calculation */

725: #endif