Actual source code: sfwindow.c
1: #include <petsc/private/sfimpl.h>
3: typedef struct _n_PetscSFDataLink *PetscSFDataLink;
4: typedef struct _n_PetscSFWinLink *PetscSFWinLink;
6: typedef struct {
7: PetscSFWindowSyncType sync; /* FENCE, LOCK, or ACTIVE synchronization */
8: PetscSFDataLink link; /* List of MPI data types, lazily constructed for each data type */
9: PetscSFWinLink wins; /* List of active windows */
10: PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */
11: PetscSF dynsf;
12: MPI_Info info;
13: } PetscSF_Window;
15: struct _n_PetscSFDataLink {
16: MPI_Datatype unit;
17: MPI_Datatype *mine;
18: MPI_Datatype *remote;
19: PetscSFDataLink next;
20: };
22: struct _n_PetscSFWinLink {
23: PetscBool inuse;
24: size_t bytes;
25: void *addr;
26: void *paddr;
27: MPI_Win win;
28: MPI_Request *reqs;
29: PetscSFWindowFlavorType flavor;
30: MPI_Aint *dyn_target_addr;
31: PetscBool epoch;
32: PetscSFWinLink next;
33: };
35: const char *const PetscSFWindowSyncTypes[] = {"FENCE","LOCK","ACTIVE","PetscSFWindowSyncType","PETSCSF_WINDOW_SYNC_",NULL};
36: const char *const PetscSFWindowFlavorTypes[] = {"CREATE","DYNAMIC","ALLOCATE","SHARED","PetscSFWindowFlavorType","PETSCSF_WINDOW_FLAVOR_",NULL};
38: /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */
39: static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op)
40: {
41: if (*op == MPIU_SUM) *op = MPI_SUM;
42: else if (*op == MPIU_MAX) *op = MPI_MAX;
43: else if (*op == MPIU_MIN) *op = MPI_MIN;
44: return 0;
45: }
47: /*@C
48: PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank
50: Not Collective
52: Input Parameters:
53: + sf - star forest
54: - unit - data type for each node
56: Output Parameters:
57: + localtypes - types describing part of local leaf buffer referencing each remote rank
58: - remotetypes - types describing part of remote root buffer referenced for each remote rank
60: Level: developer
62: .seealso: PetscSFSetGraph(), PetscSFView()
63: @*/
64: static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf,MPI_Datatype unit,const MPI_Datatype **localtypes,const MPI_Datatype **remotetypes)
65: {
66: PetscSF_Window *w = (PetscSF_Window*)sf->data;
67: PetscSFDataLink link;
68: PetscInt i,nranks;
69: const PetscInt *roffset,*rmine,*rremote;
70: const PetscMPIInt *ranks;
72: /* Look for types in cache */
73: for (link=w->link; link; link=link->next) {
74: PetscBool match;
75: MPIPetsc_Type_compare(unit,link->unit,&match);
76: if (match) {
77: *localtypes = link->mine;
78: *remotetypes = link->remote;
79: return 0;
80: }
81: }
83: /* Create new composite types for each send rank */
84: PetscSFGetRootRanks(sf,&nranks,&ranks,&roffset,&rmine,&rremote);
85: PetscNew(&link);
86: MPI_Type_dup(unit,&link->unit);
87: PetscMalloc2(nranks,&link->mine,nranks,&link->remote);
88: for (i=0; i<nranks; i++) {
89: PetscInt rcount = roffset[i+1] - roffset[i];
90: PetscMPIInt *rmine,*rremote;
91: #if !defined(PETSC_USE_64BIT_INDICES)
92: rmine = sf->rmine + sf->roffset[i];
93: rremote = sf->rremote + sf->roffset[i];
94: #else
95: PetscInt j;
96: PetscMalloc2(rcount,&rmine,rcount,&rremote);
97: for (j=0; j<rcount; j++) {
98: PetscMPIIntCast(sf->rmine[sf->roffset[i]+j],rmine+j);
99: PetscMPIIntCast(sf->rremote[sf->roffset[i]+j],rremote+j);
100: }
101: #endif
103: MPI_Type_create_indexed_block(rcount,1,rmine,link->unit,&link->mine[i]);
104: MPI_Type_create_indexed_block(rcount,1,rremote,link->unit,&link->remote[i]);
105: #if defined(PETSC_USE_64BIT_INDICES)
106: PetscFree2(rmine,rremote);
107: #endif
108: MPI_Type_commit(&link->mine[i]);
109: MPI_Type_commit(&link->remote[i]);
110: }
111: link->next = w->link;
112: w->link = link;
114: *localtypes = link->mine;
115: *remotetypes = link->remote;
116: return 0;
117: }
119: /*@C
120: PetscSFWindowSetFlavorType - Set flavor type for MPI_Win creation
122: Logically Collective
124: Input Parameters:
125: + sf - star forest for communication
126: - flavor - flavor type
128: Options Database Key:
129: . -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see PetscSFWindowFlavorType)
131: Level: advanced
133: Notes: Windows reusage follow this rules:
135: PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create
137: PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g.
138: for i=1 to K
139: PetscSFOperationBegin(rootdata1,leafdata_whatever);
140: PetscSFOperationEnd(rootdata1,leafdata_whatever);
141: ...
142: PetscSFOperationBegin(rootdataN,leafdata_whatever);
143: PetscSFOperationEnd(rootdataN,leafdata_whatever);
144: endfor
145: The following pattern will instead raise an error
146: PetscSFOperationBegin(rootdata1,leafdata_whatever);
147: PetscSFOperationEnd(rootdata1,leafdata_whatever);
148: PetscSFOperationBegin(rank ? rootdata1 : rootdata2,leafdata_whatever);
149: PetscSFOperationEnd(rank ? rootdata1 : rootdata2,leafdata_whatever);
151: PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use
153: PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE
155: .seealso: PetscSFSetFromOptions(), PetscSFWindowGetFlavorType()
156: @*/
157: PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf,PetscSFWindowFlavorType flavor)
158: {
161: PetscTryMethod(sf,"PetscSFWindowSetFlavorType_C",(PetscSF,PetscSFWindowFlavorType),(sf,flavor));
162: return 0;
163: }
165: static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf,PetscSFWindowFlavorType flavor)
166: {
167: PetscSF_Window *w = (PetscSF_Window*)sf->data;
169: w->flavor = flavor;
170: return 0;
171: }
173: /*@C
174: PetscSFWindowGetFlavorType - Get flavor type for PetscSF communication
176: Logically Collective
178: Input Parameter:
179: . sf - star forest for communication
181: Output Parameter:
182: . flavor - flavor type
184: Level: advanced
186: .seealso: PetscSFSetFromOptions(), PetscSFWindowSetFlavorType()
187: @*/
188: PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf,PetscSFWindowFlavorType *flavor)
189: {
192: PetscUseMethod(sf,"PetscSFWindowGetFlavorType_C",(PetscSF,PetscSFWindowFlavorType*),(sf,flavor));
193: return 0;
194: }
196: static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf,PetscSFWindowFlavorType *flavor)
197: {
198: PetscSF_Window *w = (PetscSF_Window*)sf->data;
200: *flavor = w->flavor;
201: return 0;
202: }
204: /*@C
205: PetscSFWindowSetSyncType - Set synchronization type for PetscSF communication
207: Logically Collective
209: Input Parameters:
210: + sf - star forest for communication
211: - sync - synchronization type
213: Options Database Key:
214: . -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see PetscSFWindowSyncType)
216: Level: advanced
218: .seealso: PetscSFSetFromOptions(), PetscSFWindowGetSyncType()
219: @*/
220: PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf,PetscSFWindowSyncType sync)
221: {
224: PetscTryMethod(sf,"PetscSFWindowSetSyncType_C",(PetscSF,PetscSFWindowSyncType),(sf,sync));
225: return 0;
226: }
228: static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf,PetscSFWindowSyncType sync)
229: {
230: PetscSF_Window *w = (PetscSF_Window*)sf->data;
232: w->sync = sync;
233: return 0;
234: }
236: /*@C
237: PetscSFWindowGetSyncType - Get synchronization type for PetscSF communication
239: Logically Collective
241: Input Parameter:
242: . sf - star forest for communication
244: Output Parameter:
245: . sync - synchronization type
247: Level: advanced
249: .seealso: PetscSFSetFromOptions(), PetscSFWindowSetSyncType()
250: @*/
251: PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf,PetscSFWindowSyncType *sync)
252: {
255: PetscUseMethod(sf,"PetscSFWindowGetSyncType_C",(PetscSF,PetscSFWindowSyncType*),(sf,sync));
256: return 0;
257: }
259: static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf,PetscSFWindowSyncType *sync)
260: {
261: PetscSF_Window *w = (PetscSF_Window*)sf->data;
263: *sync = w->sync;
264: return 0;
265: }
267: /*@C
268: PetscSFWindowSetInfo - Set the MPI_Info handle that will be used for subsequent windows allocation
270: Logically Collective
272: Input Parameters:
273: + sf - star forest for communication
274: - info - MPI_Info handle
276: Level: advanced
278: Notes: the info handle is duplicated with a call to MPI_Info_dup unless info = MPI_INFO_NULL.
280: .seealso: PetscSFSetFromOptions(), PetscSFWindowGetInfo()
281: @*/
282: PetscErrorCode PetscSFWindowSetInfo(PetscSF sf,MPI_Info info)
283: {
285: PetscTryMethod(sf,"PetscSFWindowSetInfo_C",(PetscSF,MPI_Info),(sf,info));
286: return 0;
287: }
289: static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf,MPI_Info info)
290: {
291: PetscSF_Window *w = (PetscSF_Window*)sf->data;
293: if (w->info != MPI_INFO_NULL) {
294: MPI_Info_free(&w->info);
295: }
296: if (info != MPI_INFO_NULL) {
297: MPI_Info_dup(info,&w->info);
298: }
299: return 0;
300: }
302: /*@C
303: PetscSFWindowGetInfo - Get the MPI_Info handle used for windows allocation
305: Logically Collective
307: Input Parameter:
308: . sf - star forest for communication
310: Output Parameter:
311: . info - MPI_Info handle
313: Level: advanced
315: Notes: if PetscSFWindowSetInfo() has not be called, this returns MPI_INFO_NULL
317: .seealso: PetscSFSetFromOptions(), PetscSFWindowSetInfo()
318: @*/
319: PetscErrorCode PetscSFWindowGetInfo(PetscSF sf,MPI_Info *info)
320: {
323: PetscUseMethod(sf,"PetscSFWindowGetInfo_C",(PetscSF,MPI_Info*),(sf,info));
324: return 0;
325: }
327: static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf,MPI_Info *info)
328: {
329: PetscSF_Window *w = (PetscSF_Window*)sf->data;
331: *info = w->info;
332: return 0;
333: }
335: /*
336: PetscSFGetWindow - Get a window for use with a given data type
338: Collective on PetscSF
340: Input Parameters:
341: + sf - star forest
342: . unit - data type
343: . array - array to be sent
344: . sync - type of synchronization PetscSFWindowSyncType
345: . epoch - PETSC_TRUE to acquire the window and start an epoch, PETSC_FALSE to just acquire the window
346: . fenceassert - assert parameter for call to MPI_Win_fence(), if sync == PETSCSF_WINDOW_SYNC_FENCE
347: . postassert - assert parameter for call to MPI_Win_post(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
348: - startassert - assert parameter for call to MPI_Win_start(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
350: Output Parameters:
351: + target_disp - target_disp argument for RMA calls (significative for PETSCSF_WINDOW_FLAVOR_DYNAMIC only)
352: + reqs - array of requests (significative for sync == PETSCSF_WINDOW_SYNC_LOCK only)
353: - win - window
355: Level: developer
356: .seealso: PetscSFGetRootRanks(), PetscSFWindowGetDataTypes()
357: */
358: static PetscErrorCode PetscSFGetWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscSFWindowSyncType sync,PetscBool epoch,PetscMPIInt fenceassert,PetscMPIInt postassert,PetscMPIInt startassert,const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win)
359: {
360: PetscSF_Window *w = (PetscSF_Window*)sf->data;
361: MPI_Aint lb,lb_true,bytes,bytes_true;
362: PetscSFWinLink link;
363: #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
364: MPI_Aint winaddr;
365: PetscInt nranks;
366: #endif
367: PetscBool reuse = PETSC_FALSE, update = PETSC_FALSE;
368: PetscBool dummy[2];
369: MPI_Aint wsize;
371: MPI_Type_get_extent(unit,&lb,&bytes);
372: MPI_Type_get_true_extent(unit,&lb_true,&bytes_true);
375: if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
376: for (link=w->wins; reuse && link; link=link->next) {
377: PetscBool winok = PETSC_FALSE;
378: if (w->flavor != link->flavor) continue;
379: switch (w->flavor) {
380: case PETSCSF_WINDOW_FLAVOR_DYNAMIC: /* check available matching array, error if in use (we additionally check that the matching condition is the same across processes) */
381: if (array == link->addr) {
382: if (PetscDefined(USE_DEBUG)) {
383: dummy[0] = PETSC_TRUE;
384: dummy[1] = PETSC_TRUE;
385: MPI_Allreduce(MPI_IN_PLACE,dummy,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)sf));
386: MPI_Allreduce(MPI_IN_PLACE,dummy+1,1,MPIU_BOOL,MPI_LOR ,PetscObjectComm((PetscObject)sf));
388: }
391: winok = PETSC_TRUE;
392: link->paddr = array;
393: } else if (PetscDefined(USE_DEBUG)) {
394: dummy[0] = PETSC_FALSE;
395: dummy[1] = PETSC_FALSE;
396: MPI_Allreduce(MPI_IN_PLACE,dummy ,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)sf));
397: MPI_Allreduce(MPI_IN_PLACE,dummy+1,1,MPIU_BOOL,MPI_LOR ,PetscObjectComm((PetscObject)sf));
399: }
400: break;
401: case PETSCSF_WINDOW_FLAVOR_ALLOCATE: /* check available by matching size, allocate if in use */
402: case PETSCSF_WINDOW_FLAVOR_SHARED:
403: if (!link->inuse && bytes == (MPI_Aint)link->bytes) {
404: update = PETSC_TRUE;
405: link->paddr = array;
406: winok = PETSC_TRUE;
407: }
408: break;
409: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for flavor %s",PetscSFWindowFlavorTypes[w->flavor]);
410: }
411: if (winok) {
412: *win = link->win;
413: PetscInfo(sf,"Reusing window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));
414: goto found;
415: }
416: }
418: wsize = (MPI_Aint)bytes*sf->nroots;
419: PetscNew(&link);
420: link->bytes = bytes;
421: link->next = w->wins;
422: link->flavor = w->flavor;
423: link->dyn_target_addr = NULL;
424: link->reqs = NULL;
425: w->wins = link;
426: if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
427: PetscInt i;
429: PetscMalloc1(sf->nranks,&link->reqs);
430: for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL;
431: }
432: switch (w->flavor) {
433: case PETSCSF_WINDOW_FLAVOR_CREATE:
434: MPI_Win_create(array,wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->win);
435: link->addr = array;
436: link->paddr = array;
437: break;
438: #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
439: case PETSCSF_WINDOW_FLAVOR_DYNAMIC:
440: MPI_Win_create_dynamic(w->info,PetscObjectComm((PetscObject)sf),&link->win);
441: #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) /* some OpenMPI versions do not support MPI_Win_attach(win,NULL,0); */
442: MPI_Win_attach(link->win,wsize ? array : (void*)dummy,wsize);
443: #else
444: MPI_Win_attach(link->win,array,wsize);
445: #endif
446: link->addr = array;
447: link->paddr = array;
449: PetscSFSetUp(w->dynsf);
450: PetscSFGetRootRanks(w->dynsf,&nranks,NULL,NULL,NULL,NULL);
451: PetscMalloc1(nranks,&link->dyn_target_addr);
452: MPI_Get_address(array,&winaddr);
453: PetscSFBcastBegin(w->dynsf,MPI_AINT,&winaddr,link->dyn_target_addr,MPI_REPLACE);
454: PetscSFBcastEnd(w->dynsf,MPI_AINT,&winaddr,link->dyn_target_addr,MPI_REPLACE);
455: break;
456: case PETSCSF_WINDOW_FLAVOR_ALLOCATE:
457: MPI_Win_allocate(wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->addr,&link->win);
458: update = PETSC_TRUE;
459: link->paddr = array;
460: break;
461: #endif
462: #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
463: case PETSCSF_WINDOW_FLAVOR_SHARED:
464: MPI_Win_allocate_shared(wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->addr,&link->win);
465: update = PETSC_TRUE;
466: link->paddr = array;
467: break;
468: #endif
469: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for flavor %s",PetscSFWindowFlavorTypes[w->flavor]);
470: }
471: PetscInfo(sf,"New window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));
472: *win = link->win;
474: found:
476: if (target_disp) *target_disp = link->dyn_target_addr;
477: if (reqs) *reqs = link->reqs;
478: if (update) { /* locks are needed for the "separate" memory model only, the fence guaranties memory-synchronization */
479: PetscMPIInt rank;
481: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
482: if (sync == PETSCSF_WINDOW_SYNC_LOCK) MPI_Win_lock(MPI_LOCK_EXCLUSIVE,rank,MPI_MODE_NOCHECK,*win);
483: PetscMemcpy(link->addr,array,sf->nroots*bytes);
484: if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
485: MPI_Win_unlock(rank,*win);
486: MPI_Win_fence(0,*win);
487: }
488: }
489: link->inuse = PETSC_TRUE;
490: link->epoch = epoch;
491: if (epoch) {
492: switch (sync) {
493: case PETSCSF_WINDOW_SYNC_FENCE:
494: MPI_Win_fence(fenceassert,*win);
495: break;
496: case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
497: break;
498: case PETSCSF_WINDOW_SYNC_ACTIVE: {
499: MPI_Group ingroup,outgroup;
500: PetscMPIInt isize,osize;
502: /* OpenMPI 4.0.2 with btl=vader does not like calling
503: - MPI_Win_complete when ogroup is empty
504: - MPI_Win_wait when igroup is empty
505: So, we do not even issue the corresponding start and post calls
506: The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
507: start(outgroup) has a matching post(ingroup)
508: and this is guaranteed by PetscSF
509: */
510: PetscSFGetGroups(sf,&ingroup,&outgroup);
511: MPI_Group_size(ingroup,&isize);
512: MPI_Group_size(outgroup,&osize);
513: if (isize) MPI_Win_post(ingroup,postassert,*win);
514: if (osize) MPI_Win_start(outgroup,startassert,*win);
515: } break;
516: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
517: }
518: }
519: return 0;
520: }
522: /*
523: PetscSFFindWindow - Finds a window that is already in use
525: Not Collective
527: Input Parameters:
528: + sf - star forest
529: . unit - data type
530: - array - array with which the window is associated
532: Output Parameters:
533: + win - window
534: - reqs - outstanding requests associated to the window
536: Level: developer
538: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
539: */
540: static PetscErrorCode PetscSFFindWindow(PetscSF sf,MPI_Datatype unit,const void *array,MPI_Win *win,MPI_Request **reqs)
541: {
542: PetscSF_Window *w = (PetscSF_Window*)sf->data;
543: PetscSFWinLink link;
545: *win = MPI_WIN_NULL;
546: for (link=w->wins; link; link=link->next) {
547: if (array == link->paddr) {
549: PetscInfo(sf,"Window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));
550: *win = link->win;
551: *reqs = link->reqs;
552: return 0;
553: }
554: }
555: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
556: }
558: /*
559: PetscSFRestoreWindow - Restores a window obtained with PetscSFGetWindow()
561: Collective
563: Input Parameters:
564: + sf - star forest
565: . unit - data type
566: . array - array associated with window
567: . sync - type of synchronization PetscSFWindowSyncType
568: . epoch - close an epoch, must match argument to PetscSFGetWindow()
569: . update - if we have to update the local window array
570: - win - window
572: Level: developer
574: .seealso: PetscSFFindWindow()
575: */
576: static PetscErrorCode PetscSFRestoreWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscSFWindowSyncType sync,PetscBool epoch,PetscMPIInt fenceassert,PetscBool update,MPI_Win *win)
577: {
578: PetscSF_Window *w = (PetscSF_Window*)sf->data;
579: PetscSFWinLink *p,link;
580: PetscBool reuse = PETSC_FALSE;
581: PetscSFWindowFlavorType flavor;
582: void* laddr;
583: size_t bytes;
585: for (p=&w->wins; *p; p=&(*p)->next) {
586: link = *p;
587: if (*win == link->win) {
589: if (epoch != link->epoch) {
591: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Restoring window without ending epoch");
592: }
593: laddr = link->addr;
594: flavor = link->flavor;
595: bytes = link->bytes;
596: if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
597: else { *p = link->next; update = PETSC_FALSE; } /* remove from list */
598: goto found;
599: }
600: }
601: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
603: found:
604: PetscInfo(sf,"Window %" PETSC_MPI_WIN_FMT " of flavor %d for comm %" PETSC_MPI_COMM_FMT "\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));
605: if (epoch) {
606: switch (sync) {
607: case PETSCSF_WINDOW_SYNC_FENCE:
608: MPI_Win_fence(fenceassert,*win);
609: break;
610: case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
611: break;
612: case PETSCSF_WINDOW_SYNC_ACTIVE: {
613: MPI_Group ingroup,outgroup;
614: PetscMPIInt isize,osize;
616: /* OpenMPI 4.0.2 with btl=wader does not like calling
617: - MPI_Win_complete when ogroup is empty
618: - MPI_Win_wait when igroup is empty
619: The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
620: - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete
621: - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait
622: */
623: PetscSFGetGroups(sf,&ingroup,&outgroup);
624: MPI_Group_size(ingroup,&isize);
625: MPI_Group_size(outgroup,&osize);
626: if (osize) MPI_Win_complete(*win);
627: if (isize) MPI_Win_wait(*win);
628: } break;
629: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
630: }
631: }
632: if (update) {
633: if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
634: MPI_Win_fence(MPI_MODE_NOPUT|MPI_MODE_NOSUCCEED,*win);
635: }
636: PetscMemcpy(array,laddr,sf->nroots*bytes);
637: }
638: link->epoch = PETSC_FALSE;
639: link->inuse = PETSC_FALSE;
640: link->paddr = NULL;
641: if (!reuse) {
642: PetscFree(link->dyn_target_addr);
643: PetscFree(link->reqs);
644: MPI_Win_free(&link->win);
645: PetscFree(link);
646: *win = MPI_WIN_NULL;
647: }
648: return 0;
649: }
651: static PetscErrorCode PetscSFSetUp_Window(PetscSF sf)
652: {
653: PetscSF_Window *w = (PetscSF_Window*)sf->data;
654: MPI_Group ingroup,outgroup;
656: PetscSFSetUpRanks(sf,MPI_GROUP_EMPTY);
657: if (!w->dynsf) {
658: PetscInt i;
659: PetscSFNode *remotes;
661: PetscMalloc1(sf->nranks,&remotes);
662: for (i=0;i<sf->nranks;i++) {
663: remotes[i].rank = sf->ranks[i];
664: remotes[i].index = 0;
665: }
666: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&w->dynsf);
667: PetscSFWindowSetFlavorType(w->dynsf,PETSCSF_WINDOW_FLAVOR_CREATE); /* break recursion */
668: PetscSFSetGraph(w->dynsf,1,sf->nranks,NULL,PETSC_OWN_POINTER,remotes,PETSC_OWN_POINTER);
669: PetscLogObjectParent((PetscObject)sf,(PetscObject)w->dynsf);
670: }
671: switch (w->sync) {
672: case PETSCSF_WINDOW_SYNC_ACTIVE:
673: PetscSFGetGroups(sf,&ingroup,&outgroup);
674: default:
675: break;
676: }
677: return 0;
678: }
680: static PetscErrorCode PetscSFSetFromOptions_Window(PetscOptionItems *PetscOptionsObject,PetscSF sf)
681: {
682: PetscSF_Window *w = (PetscSF_Window*)sf->data;
683: PetscSFWindowFlavorType flavor = w->flavor;
685: PetscOptionsHead(PetscOptionsObject,"PetscSF Window options");
686: PetscOptionsEnum("-sf_window_sync","synchronization type to use for PetscSF Window communication","PetscSFWindowSetSyncType",PetscSFWindowSyncTypes,(PetscEnum)w->sync,(PetscEnum*)&w->sync,NULL);
687: PetscOptionsEnum("-sf_window_flavor","flavor to use for PetscSF Window creation","PetscSFWindowSetFlavorType",PetscSFWindowFlavorTypes,(PetscEnum)flavor,(PetscEnum*)&flavor,NULL);
688: PetscSFWindowSetFlavorType(sf,flavor);
689: PetscOptionsTail();
690: return 0;
691: }
693: static PetscErrorCode PetscSFReset_Window(PetscSF sf)
694: {
695: PetscSF_Window *w = (PetscSF_Window*)sf->data;
696: PetscSFDataLink link,next;
697: PetscSFWinLink wlink,wnext;
698: PetscInt i;
700: for (link=w->link; link; link=next) {
701: next = link->next;
702: MPI_Type_free(&link->unit);
703: for (i=0; i<sf->nranks; i++) {
704: MPI_Type_free(&link->mine[i]);
705: MPI_Type_free(&link->remote[i]);
706: }
707: PetscFree2(link->mine,link->remote);
708: PetscFree(link);
709: }
710: w->link = NULL;
711: for (wlink=w->wins; wlink; wlink=wnext) {
712: wnext = wlink->next;
714: PetscFree(wlink->dyn_target_addr);
715: PetscFree(wlink->reqs);
716: MPI_Win_free(&wlink->win);
717: PetscFree(wlink);
718: }
719: w->wins = NULL;
720: PetscSFDestroy(&w->dynsf);
721: if (w->info != MPI_INFO_NULL) {
722: MPI_Info_free(&w->info);
723: }
724: return 0;
725: }
727: static PetscErrorCode PetscSFDestroy_Window(PetscSF sf)
728: {
729: PetscSFReset_Window(sf);
730: PetscFree(sf->data);
731: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",NULL);
732: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",NULL);
733: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetFlavorType_C",NULL);
734: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetFlavorType_C",NULL);
735: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetInfo_C",NULL);
736: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetInfo_C",NULL);
737: return 0;
738: }
740: static PetscErrorCode PetscSFView_Window(PetscSF sf,PetscViewer viewer)
741: {
742: PetscSF_Window *w = (PetscSF_Window*)sf->data;
743: PetscBool iascii;
744: PetscViewerFormat format;
746: PetscViewerGetFormat(viewer,&format);
747: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
748: if (iascii) {
749: PetscViewerASCIIPrintf(viewer," current flavor=%s synchronization=%s MultiSF sort=%s\n",PetscSFWindowFlavorTypes[w->flavor],PetscSFWindowSyncTypes[w->sync],sf->rankorder ? "rank-order" : "unordered");
750: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
751: if (w->info != MPI_INFO_NULL) {
752: PetscMPIInt k,nkeys;
753: char key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL];
755: MPI_Info_get_nkeys(w->info,&nkeys);
756: PetscViewerASCIIPrintf(viewer," current info with %d keys. Ordered key-value pairs follow:\n",nkeys);
757: for (k = 0; k < nkeys; k++) {
758: PetscMPIInt flag;
760: MPI_Info_get_nthkey(w->info,k,key);
761: MPI_Info_get(w->info,key,MPI_MAX_INFO_VAL,value,&flag);
763: PetscViewerASCIIPrintf(viewer," %s = %s\n",key,value);
764: }
765: } else {
766: PetscViewerASCIIPrintf(viewer," current info=MPI_INFO_NULL\n");
767: }
768: }
769: }
770: return 0;
771: }
773: static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf,PetscSFDuplicateOption opt,PetscSF newsf)
774: {
775: PetscSF_Window *w = (PetscSF_Window*)sf->data;
776: PetscSFWindowSyncType synctype;
778: synctype = w->sync;
779: /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
780: if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK;
781: PetscSFWindowSetSyncType(newsf,synctype);
782: PetscSFWindowSetFlavorType(newsf,w->flavor);
783: PetscSFWindowSetInfo(newsf,w->info);
784: return 0;
785: }
787: static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
788: {
789: PetscSF_Window *w = (PetscSF_Window*)sf->data;
790: PetscInt i,nranks;
791: const PetscMPIInt *ranks;
792: const MPI_Aint *target_disp;
793: const MPI_Datatype *mine,*remote;
794: MPI_Request *reqs;
795: MPI_Win win;
798: PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
799: PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);
800: PetscSFGetWindow(sf,unit,(void*)rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPUT|MPI_MODE_NOPRECEDE,MPI_MODE_NOPUT,0,&target_disp,&reqs,&win);
801: for (i=0; i<nranks; i++) {
802: MPI_Aint tdp = target_disp ? target_disp[i] : 0;
804: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
805: MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);
806: #if defined(PETSC_HAVE_MPI_RGET)
807: MPI_Rget(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win,&reqs[i]);
808: #else
809: MPI_Get(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win);
810: #endif
811: } else {
812: MPI_Get(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win);
813: }
814: }
815: return 0;
816: }
818: PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
819: {
820: PetscSF_Window *w = (PetscSF_Window*)sf->data;
821: MPI_Win win;
822: MPI_Request *reqs = NULL;
824: PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);
825: if (reqs) MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);
826: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
827: PetscInt i,nranks;
828: const PetscMPIInt *ranks;
830: PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
831: for (i=0; i<nranks; i++) {
832: MPI_Win_unlock(ranks[i],win);
833: }
834: }
835: PetscSFRestoreWindow(sf,unit,(void*)rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSTORE|MPI_MODE_NOSUCCEED,PETSC_FALSE,&win);
836: return 0;
837: }
839: PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
840: {
841: PetscSF_Window *w = (PetscSF_Window*)sf->data;
842: PetscInt i,nranks;
843: const PetscMPIInt *ranks;
844: const MPI_Aint *target_disp;
845: const MPI_Datatype *mine,*remote;
846: MPI_Win win;
848: PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
849: PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);
850: PetscSFWindowOpTranslate(&op);
851: PetscSFGetWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&target_disp,NULL,&win);
852: for (i=0; i<nranks; i++) {
853: MPI_Aint tdp = target_disp ? target_disp[i] : 0;
855: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);
856: MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],tdp,1,remote[i],op,win);
857: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) MPI_Win_unlock(ranks[i],win);
858: }
859: return 0;
860: }
862: static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
863: {
864: PetscSF_Window *w = (PetscSF_Window*)sf->data;
865: MPI_Win win;
866: MPI_Request *reqs = NULL;
868: PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);
869: if (reqs) MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);
870: PetscSFRestoreWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSUCCEED,PETSC_TRUE,&win);
871: return 0;
872: }
874: static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
875: {
876: PetscInt i,nranks;
877: const PetscMPIInt *ranks;
878: const MPI_Datatype *mine,*remote;
879: const MPI_Aint *target_disp;
880: MPI_Win win;
881: PetscSF_Window *w = (PetscSF_Window*)sf->data;
882: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
883: PetscSFWindowFlavorType oldf;
884: #endif
886: PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
887: PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);
888: PetscSFWindowOpTranslate(&op);
889: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
890: /* FetchAndOp without MPI_Get_Accumulate requires locking.
891: we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */
892: oldf = w->flavor;
893: w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
894: PetscSFGetWindow(sf,unit,rootdata,PETSCSF_WINDOW_SYNC_LOCK,PETSC_FALSE,0,0,0,&target_disp,NULL,&win);
895: #else
896: PetscSFGetWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&target_disp,NULL,&win);
897: #endif
898: for (i=0; i<nranks; i++) {
899: MPI_Aint tdp = target_disp ? target_disp[i] : 0;
901: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
902: MPI_Win_lock(MPI_LOCK_EXCLUSIVE,ranks[i],0,win);
903: MPI_Get(leafupdate,1,mine[i],ranks[i],tdp,1,remote[i],win);
904: MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],tdp,1,remote[i],op,win);
905: MPI_Win_unlock(ranks[i],win);
906: #else
907: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],0,win);
908: MPI_Get_accumulate((void*)leafdata,1,mine[i],leafupdate,1,mine[i],ranks[i],tdp,1,remote[i],op,win);
909: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) MPI_Win_unlock(ranks[i],win);
910: #endif
911: }
912: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
913: w->flavor = oldf;
914: #endif
915: return 0;
916: }
918: static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
919: {
920: MPI_Win win;
921: #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
922: PetscSF_Window *w = (PetscSF_Window*)sf->data;
923: #endif
924: MPI_Request *reqs = NULL;
926: PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);
927: if (reqs) MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);
928: #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
929: PetscSFRestoreWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSUCCEED,PETSC_TRUE,&win);
930: #else
931: PetscSFRestoreWindow(sf,unit,rootdata,PETSCSF_WINDOW_SYNC_LOCK,PETSC_FALSE,0,PETSC_TRUE,&win);
932: #endif
933: return 0;
934: }
936: PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf)
937: {
938: PetscSF_Window *w = (PetscSF_Window*)sf->data;
940: sf->ops->SetUp = PetscSFSetUp_Window;
941: sf->ops->SetFromOptions = PetscSFSetFromOptions_Window;
942: sf->ops->Reset = PetscSFReset_Window;
943: sf->ops->Destroy = PetscSFDestroy_Window;
944: sf->ops->View = PetscSFView_Window;
945: sf->ops->Duplicate = PetscSFDuplicate_Window;
946: sf->ops->BcastBegin = PetscSFBcastBegin_Window;
947: sf->ops->BcastEnd = PetscSFBcastEnd_Window;
948: sf->ops->ReduceBegin = PetscSFReduceBegin_Window;
949: sf->ops->ReduceEnd = PetscSFReduceEnd_Window;
950: sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
951: sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Window;
953: PetscNewLog(sf,&w);
954: sf->data = (void*)w;
955: w->sync = PETSCSF_WINDOW_SYNC_FENCE;
956: w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
957: w->info = MPI_INFO_NULL;
959: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",PetscSFWindowSetSyncType_Window);
960: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",PetscSFWindowGetSyncType_Window);
961: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetFlavorType_C",PetscSFWindowSetFlavorType_Window);
962: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetFlavorType_C",PetscSFWindowGetFlavorType_Window);
963: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetInfo_C",PetscSFWindowSetInfo_Window);
964: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetInfo_C",PetscSFWindowGetInfo_Window);
966: #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6))
967: {
968: PetscBool ackbug = PETSC_FALSE;
969: PetscOptionsGetBool(NULL,NULL,"-acknowledge_ompi_onesided_bug",&ackbug,NULL);
970: if (ackbug) {
971: PetscInfo(sf,"Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n");
972: } else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_LIB,"Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
973: }
974: #endif
975: return 0;
976: }