Actual source code: sfwindow.c
1: #include <petsc/private/sfimpl.h>
3: typedef struct _n_PetscSFDataLink *PetscSFDataLink;
4: typedef struct _n_PetscSFWinLink *PetscSFWinLink;
6: typedef struct {
7: PetscSFWindowSyncType sync; /* FENCE, LOCK, or ACTIVE synchronization */
8: PetscSFDataLink link; /* List of MPI data types, lazily constructed for each data type */
9: PetscSFWinLink wins; /* List of active windows */
10: PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */
11: PetscSF dynsf;
12: MPI_Info info;
13: } PetscSF_Window;
15: struct _n_PetscSFDataLink {
16: MPI_Datatype unit;
17: MPI_Datatype *mine;
18: MPI_Datatype *remote;
19: PetscSFDataLink next;
20: };
22: struct _n_PetscSFWinLink {
23: PetscBool inuse;
24: size_t bytes;
25: void *addr;
26: void *paddr;
27: MPI_Win win;
28: MPI_Request *reqs;
29: PetscSFWindowFlavorType flavor;
30: MPI_Aint *dyn_target_addr;
31: PetscBool epoch;
32: PetscSFWinLink next;
33: };
35: const char *const PetscSFWindowSyncTypes[] = {"FENCE","LOCK","ACTIVE","PetscSFWindowSyncType","PETSCSF_WINDOW_SYNC_",NULL};
36: const char *const PetscSFWindowFlavorTypes[] = {"CREATE","DYNAMIC","ALLOCATE","SHARED","PetscSFWindowFlavorType","PETSCSF_WINDOW_FLAVOR_",NULL};
38: /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPIU_Allreduce) */
39: static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op)
40: {
42: if (*op == MPIU_SUM) *op = MPI_SUM;
43: else if (*op == MPIU_MAX) *op = MPI_MAX;
44: else if (*op == MPIU_MIN) *op = MPI_MIN;
45: return(0);
46: }
48: /*@C
49: PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank
51: Not Collective
53: Input Arguments:
54: + sf - star forest
55: - unit - data type for each node
57: Output Arguments:
58: + localtypes - types describing part of local leaf buffer referencing each remote rank
59: - remotetypes - types describing part of remote root buffer referenced for each remote rank
61: Level: developer
63: .seealso: PetscSFSetGraph(), PetscSFView()
64: @*/
65: static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf,MPI_Datatype unit,const MPI_Datatype **localtypes,const MPI_Datatype **remotetypes)
66: {
67: PetscSF_Window *w = (PetscSF_Window*)sf->data;
68: PetscErrorCode ierr;
69: PetscSFDataLink link;
70: PetscInt i,nranks;
71: const PetscInt *roffset,*rmine,*rremote;
72: const PetscMPIInt *ranks;
75: /* Look for types in cache */
76: for (link=w->link; link; link=link->next) {
77: PetscBool match;
78: MPIPetsc_Type_compare(unit,link->unit,&match);
79: if (match) {
80: *localtypes = link->mine;
81: *remotetypes = link->remote;
82: return(0);
83: }
84: }
86: /* Create new composite types for each send rank */
87: PetscSFGetRootRanks(sf,&nranks,&ranks,&roffset,&rmine,&rremote);
88: PetscNew(&link);
89: MPI_Type_dup(unit,&link->unit);
90: PetscMalloc2(nranks,&link->mine,nranks,&link->remote);
91: for (i=0; i<nranks; i++) {
92: PetscInt rcount = roffset[i+1] - roffset[i];
93: PetscMPIInt *rmine,*rremote;
94: #if !defined(PETSC_USE_64BIT_INDICES)
95: rmine = sf->rmine + sf->roffset[i];
96: rremote = sf->rremote + sf->roffset[i];
97: #else
98: PetscInt j;
99: PetscMalloc2(rcount,&rmine,rcount,&rremote);
100: for (j=0; j<rcount; j++) {
101: PetscMPIIntCast(sf->rmine[sf->roffset[i]+j],rmine+j);
102: PetscMPIIntCast(sf->rremote[sf->roffset[i]+j],rremote+j);
103: }
104: #endif
106: MPI_Type_create_indexed_block(rcount,1,rmine,link->unit,&link->mine[i]);
107: MPI_Type_create_indexed_block(rcount,1,rremote,link->unit,&link->remote[i]);
108: #if defined(PETSC_USE_64BIT_INDICES)
109: PetscFree2(rmine,rremote);
110: #endif
111: MPI_Type_commit(&link->mine[i]);
112: MPI_Type_commit(&link->remote[i]);
113: }
114: link->next = w->link;
115: w->link = link;
117: *localtypes = link->mine;
118: *remotetypes = link->remote;
119: return(0);
120: }
122: /*@C
123: PetscSFWindowSetFlavorType - Set flavor type for MPI_Win creation
125: Logically Collective
127: Input Arguments:
128: + sf - star forest for communication
129: - flavor - flavor type
131: Options Database Key:
132: . -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see PetscSFWindowFlavorType)
134: Level: advanced
136: Notes: Windows reusage follow this rules:
138: PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create
140: PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g.
141: for i=1 to K
142: PetscSFOperationBegin(rootdata1,leafdata_whatever);
143: PetscSFOperationEnd(rootdata1,leafdata_whatever);
144: ...
145: PetscSFOperationBegin(rootdataN,leafdata_whatever);
146: PetscSFOperationEnd(rootdataN,leafdata_whatever);
147: endfor
148: The following pattern will instead raise an error
149: PetscSFOperationBegin(rootdata1,leafdata_whatever);
150: PetscSFOperationEnd(rootdata1,leafdata_whatever);
151: PetscSFOperationBegin(rank ? rootdata1 : rootdata2,leafdata_whatever);
152: PetscSFOperationEnd(rank ? rootdata1 : rootdata2,leafdata_whatever);
154: PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use
156: PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE
158: .seealso: PetscSFSetFromOptions(), PetscSFWindowGetFlavorType()
159: @*/
160: PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf,PetscSFWindowFlavorType flavor)
161: {
167: PetscTryMethod(sf,"PetscSFWindowSetFlavorType_C",(PetscSF,PetscSFWindowFlavorType),(sf,flavor));
168: return(0);
169: }
171: static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf,PetscSFWindowFlavorType flavor)
172: {
173: PetscSF_Window *w = (PetscSF_Window*)sf->data;
176: w->flavor = flavor;
177: return(0);
178: }
180: /*@C
181: PetscSFWindowGetFlavorType - Get flavor type for PetscSF communication
183: Logically Collective
185: Input Argument:
186: . sf - star forest for communication
188: Output Argument:
189: . flavor - flavor type
191: Level: advanced
193: .seealso: PetscSFSetFromOptions(), PetscSFWindowSetFlavorType()
194: @*/
195: PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf,PetscSFWindowFlavorType *flavor)
196: {
202: PetscUseMethod(sf,"PetscSFWindowGetFlavorType_C",(PetscSF,PetscSFWindowFlavorType*),(sf,flavor));
203: return(0);
204: }
206: static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf,PetscSFWindowFlavorType *flavor)
207: {
208: PetscSF_Window *w = (PetscSF_Window*)sf->data;
211: *flavor = w->flavor;
212: return(0);
213: }
215: /*@C
216: PetscSFWindowSetSyncType - Set synchronization type for PetscSF communication
218: Logically Collective
220: Input Arguments:
221: + sf - star forest for communication
222: - sync - synchronization type
224: Options Database Key:
225: . -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see PetscSFWindowSyncType)
227: Level: advanced
229: .seealso: PetscSFSetFromOptions(), PetscSFWindowGetSyncType()
230: @*/
231: PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf,PetscSFWindowSyncType sync)
232: {
238: PetscTryMethod(sf,"PetscSFWindowSetSyncType_C",(PetscSF,PetscSFWindowSyncType),(sf,sync));
239: return(0);
240: }
242: static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf,PetscSFWindowSyncType sync)
243: {
244: PetscSF_Window *w = (PetscSF_Window*)sf->data;
247: w->sync = sync;
248: return(0);
249: }
251: /*@C
252: PetscSFWindowGetSyncType - Get synchronization type for PetscSF communication
254: Logically Collective
256: Input Argument:
257: . sf - star forest for communication
259: Output Argument:
260: . sync - synchronization type
262: Level: advanced
264: .seealso: PetscSFSetFromOptions(), PetscSFWindowSetSyncType()
265: @*/
266: PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf,PetscSFWindowSyncType *sync)
267: {
273: PetscUseMethod(sf,"PetscSFWindowGetSyncType_C",(PetscSF,PetscSFWindowSyncType*),(sf,sync));
274: return(0);
275: }
277: static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf,PetscSFWindowSyncType *sync)
278: {
279: PetscSF_Window *w = (PetscSF_Window*)sf->data;
282: *sync = w->sync;
283: return(0);
284: }
286: /*@C
287: PetscSFWindowSetInfo - Set the MPI_Info handle that will be used for subsequent windows allocation
289: Logically Collective
291: Input Argument:
292: + sf - star forest for communication
293: - info - MPI_Info handle
295: Level: advanced
297: Notes: the info handle is duplicated with a call to MPI_Info_dup unless info = MPI_INFO_NULL.
299: .seealso: PetscSFSetFromOptions(), PetscSFWindowGetInfo()
300: @*/
301: PetscErrorCode PetscSFWindowSetInfo(PetscSF sf,MPI_Info info)
302: {
307: PetscTryMethod(sf,"PetscSFWindowSetInfo_C",(PetscSF,MPI_Info),(sf,info));
308: return(0);
309: }
311: static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf,MPI_Info info)
312: {
313: PetscSF_Window *w = (PetscSF_Window*)sf->data;
317: if (w->info != MPI_INFO_NULL) {
318: MPI_Info_free(&w->info);
319: }
320: if (info != MPI_INFO_NULL) {
321: MPI_Info_dup(info,&w->info);
322: }
323: return(0);
324: }
326: /*@C
327: PetscSFWindowGetInfo - Get the MPI_Info handle used for windows allocation
329: Logically Collective
331: Input Argument:
332: . sf - star forest for communication
334: Output Argument:
335: . info - MPI_Info handle
337: Level: advanced
339: Notes: if PetscSFWindowSetInfo() has not be called, this returns MPI_INFO_NULL
341: .seealso: PetscSFSetFromOptions(), PetscSFWindowSetInfo()
342: @*/
343: PetscErrorCode PetscSFWindowGetInfo(PetscSF sf,MPI_Info *info)
344: {
350: PetscUseMethod(sf,"PetscSFWindowGetInfo_C",(PetscSF,MPI_Info*),(sf,info));
351: return(0);
352: }
354: static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf,MPI_Info *info)
355: {
356: PetscSF_Window *w = (PetscSF_Window*)sf->data;
359: *info = w->info;
360: return(0);
361: }
363: /*
364: PetscSFGetWindow - Get a window for use with a given data type
366: Collective on PetscSF
368: Input Arguments:
369: + sf - star forest
370: . unit - data type
371: . array - array to be sent
372: . sync - type of synchronization PetscSFWindowSyncType
373: . epoch - PETSC_TRUE to acquire the window and start an epoch, PETSC_FALSE to just acquire the window
374: . fenceassert - assert parameter for call to MPI_Win_fence(), if sync == PETSCSF_WINDOW_SYNC_FENCE
375: . postassert - assert parameter for call to MPI_Win_post(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
376: - startassert - assert parameter for call to MPI_Win_start(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
378: Output Arguments:
379: + target_disp - target_disp argument for RMA calls (significative for PETSCSF_WINDOW_FLAVOR_DYNAMIC only)
380: + reqs - array of requests (significative for sync == PETSCSF_WINDOW_SYNC_LOCK only)
381: - win - window
383: Level: developer
384: .seealso: PetscSFGetRootRanks(), PetscSFWindowGetDataTypes()
385: */
386: static PetscErrorCode PetscSFGetWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscSFWindowSyncType sync,PetscBool epoch,PetscMPIInt fenceassert,PetscMPIInt postassert,PetscMPIInt startassert,const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win)
387: {
388: PetscSF_Window *w = (PetscSF_Window*)sf->data;
390: MPI_Aint lb,lb_true,bytes,bytes_true;
391: PetscSFWinLink link;
392: MPI_Aint winaddr;
393: PetscInt nranks;
394: PetscBool reuse = PETSC_FALSE, update = PETSC_FALSE;
395: PetscBool dummy[2];
396: MPI_Aint wsize;
399: MPI_Type_get_extent(unit,&lb,&bytes);
400: MPI_Type_get_true_extent(unit,&lb_true,&bytes_true);
401: if (lb != 0 || lb_true != 0) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
402: if (bytes != bytes_true) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature");
403: if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
404: for (link=w->wins; reuse && link; link=link->next) {
405: PetscBool winok = PETSC_FALSE;
406: if (w->flavor != link->flavor) continue;
407: switch (w->flavor) {
408: case PETSCSF_WINDOW_FLAVOR_DYNAMIC: /* check available matching array, error if in use (we additionally check that the matching condition is the same across processes) */
409: if (array == link->addr) {
410: if (PetscDefined(USE_DEBUG)) {
411: dummy[0] = PETSC_TRUE;
412: dummy[1] = PETSC_TRUE;
413: MPI_Allreduce(MPI_IN_PLACE,dummy,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)sf));
414: MPI_Allreduce(MPI_IN_PLACE,dummy+1,1,MPIU_BOOL,MPI_LOR ,PetscObjectComm((PetscObject)sf));
415: if (dummy[0] != dummy[1]) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
416: }
417: if (link->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Window in use");
418: if (epoch && link->epoch) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Window epoch not finished");
419: winok = PETSC_TRUE;
420: link->paddr = array;
421: } else if (PetscDefined(USE_DEBUG)) {
422: dummy[0] = PETSC_FALSE;
423: dummy[1] = PETSC_FALSE;
424: MPI_Allreduce(MPI_IN_PLACE,dummy ,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)sf));
425: MPI_Allreduce(MPI_IN_PLACE,dummy+1,1,MPIU_BOOL,MPI_LOR ,PetscObjectComm((PetscObject)sf));
426: if (dummy[0] != dummy[1]) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
427: }
428: break;
429: case PETSCSF_WINDOW_FLAVOR_ALLOCATE: /* check available by matching size, allocate if in use */
430: case PETSCSF_WINDOW_FLAVOR_SHARED:
431: if (!link->inuse && bytes == (MPI_Aint)link->bytes) {
432: update = PETSC_TRUE;
433: link->paddr = array;
434: winok = PETSC_TRUE;
435: }
436: break;
437: default: SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for flavor %s",PetscSFWindowFlavorTypes[w->flavor]);
438: }
439: if (winok) {
440: *win = link->win;
441: PetscInfo3(sf,"Reusing window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));
442: goto found;
443: }
444: }
446: wsize = (MPI_Aint)bytes*sf->nroots;
447: PetscNew(&link);
448: link->bytes = bytes;
449: link->next = w->wins;
450: link->flavor = w->flavor;
451: link->dyn_target_addr = NULL;
452: link->reqs = NULL;
453: w->wins = link;
454: if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
455: PetscInt i;
457: PetscMalloc1(sf->nranks,&link->reqs);
458: for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL;
459: }
460: switch (w->flavor) {
461: case PETSCSF_WINDOW_FLAVOR_CREATE:
462: MPI_Win_create(array,wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->win);
463: link->addr = array;
464: link->paddr = array;
465: break;
466: case PETSCSF_WINDOW_FLAVOR_DYNAMIC:
467: MPI_Win_create_dynamic(w->info,PetscObjectComm((PetscObject)sf),&link->win);
468: #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) /* some OpenMPI versions do not support MPI_Win_attach(win,NULL,0); */
469: MPI_Win_attach(link->win,wsize ? array : &ierr,wsize);
470: #else
471: MPI_Win_attach(link->win,array,wsize);
472: #endif
473: link->addr = array;
474: link->paddr = array;
475: if (!w->dynsf) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ORDER,"Must call PetscSFSetUp()");
476: PetscSFSetUp(w->dynsf);
477: PetscSFGetRootRanks(w->dynsf,&nranks,NULL,NULL,NULL,NULL);
478: PetscMalloc1(nranks,&link->dyn_target_addr);
479: MPI_Get_address(array,&winaddr);
480: PetscSFBcastBegin(w->dynsf,MPI_AINT,&winaddr,link->dyn_target_addr,MPI_REPLACE);
481: PetscSFBcastEnd(w->dynsf,MPI_AINT,&winaddr,link->dyn_target_addr,MPI_REPLACE);
482: break;
483: case PETSCSF_WINDOW_FLAVOR_ALLOCATE:
484: MPI_Win_allocate(wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->addr,&link->win);
485: update = PETSC_TRUE;
486: link->paddr = array;
487: break;
488: #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
489: case PETSCSF_WINDOW_FLAVOR_SHARED:
490: MPI_Win_allocate_shared(wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->addr,&link->win);
491: update = PETSC_TRUE;
492: link->paddr = array;
493: break;
494: #endif
495: default: SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for flavor %s",PetscSFWindowFlavorTypes[w->flavor]);
496: }
497: PetscInfo3(sf,"New window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));
498: *win = link->win;
500: found:
502: if (target_disp) *target_disp = link->dyn_target_addr;
503: if (reqs) *reqs = link->reqs;
504: if (update) { /* locks are needed for the "separate" memory model only, the fence guaranties memory-synchronization */
505: PetscMPIInt rank;
507: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
508: if (sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_lock(MPI_LOCK_EXCLUSIVE,rank,MPI_MODE_NOCHECK,*win);}
509: PetscMemcpy(link->addr,array,sf->nroots*bytes);
510: if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
511: MPI_Win_unlock(rank,*win);
512: MPI_Win_fence(0,*win);
513: }
514: }
515: link->inuse = PETSC_TRUE;
516: link->epoch = epoch;
517: if (epoch) {
518: switch (sync) {
519: case PETSCSF_WINDOW_SYNC_FENCE:
520: MPI_Win_fence(fenceassert,*win);
521: break;
522: case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
523: break;
524: case PETSCSF_WINDOW_SYNC_ACTIVE: {
525: MPI_Group ingroup,outgroup;
526: PetscMPIInt isize,osize;
528: /* OpenMPI 4.0.2 with btl=vader does not like calling
529: - MPI_Win_complete when ogroup is empty
530: - MPI_Win_wait when igroup is empty
531: So, we do not even issue the corresponding start and post calls
532: The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
533: start(outgroup) has a matching post(ingroup)
534: and this is guaranteed by PetscSF
535: */
536: PetscSFGetGroups(sf,&ingroup,&outgroup);
537: MPI_Group_size(ingroup,&isize);
538: MPI_Group_size(outgroup,&osize);
539: if (isize) {MPI_Win_post(ingroup,postassert,*win);}
540: if (osize) {MPI_Win_start(outgroup,startassert,*win);}
541: } break;
542: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
543: }
544: }
545: return(0);
546: }
548: /*
549: PetscSFFindWindow - Finds a window that is already in use
551: Not Collective
553: Input Arguments:
554: + sf - star forest
555: . unit - data type
556: - array - array with which the window is associated
558: Output Arguments:
559: + win - window
560: - reqs - outstanding requests associated to the window
562: Level: developer
564: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
565: */
566: static PetscErrorCode PetscSFFindWindow(PetscSF sf,MPI_Datatype unit,const void *array,MPI_Win *win,MPI_Request **reqs)
567: {
568: PetscSF_Window *w = (PetscSF_Window*)sf->data;
569: PetscSFWinLink link;
573: *win = MPI_WIN_NULL;
574: for (link=w->wins; link; link=link->next) {
575: if (array == link->paddr) {
576: PetscInfo3(sf,"Window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));
577: *win = link->win;
578: *reqs = link->reqs;
579: return(0);
580: }
581: }
582: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
583: }
585: /*
586: PetscSFRestoreWindow - Restores a window obtained with PetscSFGetWindow()
588: Collective
590: Input Arguments:
591: + sf - star forest
592: . unit - data type
593: . array - array associated with window
594: . sync - type of synchronization PetscSFWindowSyncType
595: . epoch - close an epoch, must match argument to PetscSFGetWindow()
596: . update - if we have to update the local window array
597: - win - window
599: Level: developer
601: .seealso: PetscSFFindWindow()
602: */
603: static PetscErrorCode PetscSFRestoreWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscSFWindowSyncType sync,PetscBool epoch,PetscMPIInt fenceassert,PetscBool update,MPI_Win *win)
604: {
605: PetscSF_Window *w = (PetscSF_Window*)sf->data;
606: PetscErrorCode ierr;
607: PetscSFWinLink *p,link;
608: PetscBool reuse = PETSC_FALSE;
609: PetscSFWindowFlavorType flavor;
610: void* laddr;
611: size_t bytes;
614: for (p=&w->wins; *p; p=&(*p)->next) {
615: link = *p;
616: if (*win == link->win) {
617: if (array != link->paddr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Matched window, but not array");
618: if (epoch != link->epoch) {
619: if (epoch) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"No epoch to end");
620: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Restoring window without ending epoch");
621: }
622: laddr = link->addr;
623: flavor = link->flavor;
624: bytes = link->bytes;
625: if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
626: else { *p = link->next; update = PETSC_FALSE; } /* remove from list */
627: goto found;
628: }
629: }
630: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
632: found:
633: PetscInfo3(sf,"Window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));
634: if (epoch) {
635: switch (sync) {
636: case PETSCSF_WINDOW_SYNC_FENCE:
637: MPI_Win_fence(fenceassert,*win);
638: break;
639: case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
640: break;
641: case PETSCSF_WINDOW_SYNC_ACTIVE: {
642: MPI_Group ingroup,outgroup;
643: PetscMPIInt isize,osize;
645: /* OpenMPI 4.0.2 with btl=wader does not like calling
646: - MPI_Win_complete when ogroup is empty
647: - MPI_Win_wait when igroup is empty
648: The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
649: - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete
650: - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait
651: */
652: PetscSFGetGroups(sf,&ingroup,&outgroup);
653: MPI_Group_size(ingroup,&isize);
654: MPI_Group_size(outgroup,&osize);
655: if (osize) {MPI_Win_complete(*win);}
656: if (isize) {MPI_Win_wait(*win);}
657: } break;
658: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
659: }
660: }
661: if (update) {
662: if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
663: MPI_Win_fence(MPI_MODE_NOPUT|MPI_MODE_NOSUCCEED,*win);
664: }
665: PetscMemcpy(array,laddr,sf->nroots*bytes);
666: }
667: link->epoch = PETSC_FALSE;
668: link->inuse = PETSC_FALSE;
669: link->paddr = NULL;
670: if (!reuse) {
671: PetscFree(link->dyn_target_addr);
672: PetscFree(link->reqs);
673: MPI_Win_free(&link->win);
674: PetscFree(link);
675: *win = MPI_WIN_NULL;
676: }
677: return(0);
678: }
680: static PetscErrorCode PetscSFSetUp_Window(PetscSF sf)
681: {
682: PetscSF_Window *w = (PetscSF_Window*)sf->data;
684: MPI_Group ingroup,outgroup;
687: PetscSFSetUpRanks(sf,MPI_GROUP_EMPTY);
688: if (!w->dynsf) {
689: PetscInt i;
690: PetscSFNode *remotes;
692: PetscMalloc1(sf->nranks,&remotes);
693: for (i=0;i<sf->nranks;i++) {
694: remotes[i].rank = sf->ranks[i];
695: remotes[i].index = 0;
696: }
697: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&w->dynsf);
698: PetscSFWindowSetFlavorType(w->dynsf,PETSCSF_WINDOW_FLAVOR_CREATE); /* break recursion */
699: PetscSFSetGraph(w->dynsf,1,sf->nranks,NULL,PETSC_OWN_POINTER,remotes,PETSC_OWN_POINTER);
700: PetscLogObjectParent((PetscObject)sf,(PetscObject)w->dynsf);
701: }
702: switch (w->sync) {
703: case PETSCSF_WINDOW_SYNC_ACTIVE:
704: PetscSFGetGroups(sf,&ingroup,&outgroup);
705: default:
706: break;
707: }
708: return(0);
709: }
711: static PetscErrorCode PetscSFSetFromOptions_Window(PetscOptionItems *PetscOptionsObject,PetscSF sf)
712: {
713: PetscSF_Window *w = (PetscSF_Window*)sf->data;
714: PetscErrorCode ierr;
715: PetscSFWindowFlavorType flavor = w->flavor;
718: PetscOptionsHead(PetscOptionsObject,"PetscSF Window options");
719: PetscOptionsEnum("-sf_window_sync","synchronization type to use for PetscSF Window communication","PetscSFWindowSetSyncType",PetscSFWindowSyncTypes,(PetscEnum)w->sync,(PetscEnum*)&w->sync,NULL);
720: PetscOptionsEnum("-sf_window_flavor","flavor to use for PetscSF Window creation","PetscSFWindowSetFlavorType",PetscSFWindowFlavorTypes,(PetscEnum)flavor,(PetscEnum*)&flavor,NULL);
721: PetscSFWindowSetFlavorType(sf,flavor);
722: PetscOptionsTail();
723: return(0);
724: }
726: static PetscErrorCode PetscSFReset_Window(PetscSF sf)
727: {
728: PetscSF_Window *w = (PetscSF_Window*)sf->data;
729: PetscErrorCode ierr;
730: PetscSFDataLink link,next;
731: PetscSFWinLink wlink,wnext;
732: PetscInt i;
735: for (link=w->link; link; link=next) {
736: next = link->next;
737: MPI_Type_free(&link->unit);
738: for (i=0; i<sf->nranks; i++) {
739: MPI_Type_free(&link->mine[i]);
740: MPI_Type_free(&link->remote[i]);
741: }
742: PetscFree2(link->mine,link->remote);
743: PetscFree(link);
744: }
745: w->link = NULL;
746: for (wlink=w->wins; wlink; wlink=wnext) {
747: wnext = wlink->next;
748: if (wlink->inuse) SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Window still in use with address %p",(void*)wlink->addr);
749: PetscFree(wlink->dyn_target_addr);
750: PetscFree(wlink->reqs);
751: MPI_Win_free(&wlink->win);
752: PetscFree(wlink);
753: }
754: w->wins = NULL;
755: PetscSFDestroy(&w->dynsf);
756: if (w->info != MPI_INFO_NULL) {
757: MPI_Info_free(&w->info);
758: }
759: return(0);
760: }
762: static PetscErrorCode PetscSFDestroy_Window(PetscSF sf)
763: {
767: PetscSFReset_Window(sf);
768: PetscFree(sf->data);
769: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",NULL);
770: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",NULL);
771: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetFlavorType_C",NULL);
772: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetFlavorType_C",NULL);
773: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetInfo_C",NULL);
774: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetInfo_C",NULL);
775: return(0);
776: }
778: static PetscErrorCode PetscSFView_Window(PetscSF sf,PetscViewer viewer)
779: {
780: PetscSF_Window *w = (PetscSF_Window*)sf->data;
781: PetscErrorCode ierr;
782: PetscBool iascii;
783: PetscViewerFormat format;
786: PetscViewerGetFormat(viewer,&format);
787: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
788: if (iascii) {
789: PetscViewerASCIIPrintf(viewer," current flavor=%s synchronization=%s MultiSF sort=%s\n",PetscSFWindowFlavorTypes[w->flavor],PetscSFWindowSyncTypes[w->sync],sf->rankorder ? "rank-order" : "unordered");
790: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
791: if (w->info != MPI_INFO_NULL) {
792: PetscMPIInt k,nkeys;
793: char key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL];
795: MPI_Info_get_nkeys(w->info,&nkeys);
796: PetscViewerASCIIPrintf(viewer," current info with %d keys. Ordered key-value pairs follow:\n",nkeys);
797: for (k = 0; k < nkeys; k++) {
798: PetscMPIInt flag;
800: MPI_Info_get_nthkey(w->info,k,key);
801: MPI_Info_get(w->info,key,MPI_MAX_INFO_VAL,value,&flag);
802: if (!flag) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing key %s",key);
803: PetscViewerASCIIPrintf(viewer," %s = %s\n",key,value);
804: }
805: } else {
806: PetscViewerASCIIPrintf(viewer," current info=MPI_INFO_NULL\n");
807: }
808: }
809: }
810: return(0);
811: }
813: static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf,PetscSFDuplicateOption opt,PetscSF newsf)
814: {
815: PetscSF_Window *w = (PetscSF_Window*)sf->data;
816: PetscErrorCode ierr;
817: PetscSFWindowSyncType synctype;
820: synctype = w->sync;
821: /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
822: if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK;
823: PetscSFWindowSetSyncType(newsf,synctype);
824: PetscSFWindowSetFlavorType(newsf,w->flavor);
825: PetscSFWindowSetInfo(newsf,w->info);
826: return(0);
827: }
829: static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
830: {
831: PetscSF_Window *w = (PetscSF_Window*)sf->data;
832: PetscErrorCode ierr;
833: PetscInt i,nranks;
834: const PetscMPIInt *ranks;
835: const MPI_Aint *target_disp;
836: const MPI_Datatype *mine,*remote;
837: MPI_Request *reqs;
838: MPI_Win win;
841: if (op != MPI_REPLACE) SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PetscSFBcastBegin_Window with op!=MPI_REPLACE has not been implemented");
842: PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
843: PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);
844: PetscSFGetWindow(sf,unit,(void*)rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPUT|MPI_MODE_NOPRECEDE,MPI_MODE_NOPUT,0,&target_disp,&reqs,&win);
845: for (i=0; i<nranks; i++) {
846: MPI_Aint tdp = target_disp ? target_disp[i] : 0;
848: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
849: MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);
850: #if defined(PETSC_HAVE_MPI_RGET)
851: MPI_Rget(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win,&reqs[i]);
852: #else
853: MPI_Get(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win);
854: #endif
855: } else {
856: MPI_Get(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win);
857: }
858: }
859: return(0);
860: }
862: PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
863: {
864: PetscSF_Window *w = (PetscSF_Window*)sf->data;
866: MPI_Win win;
867: MPI_Request *reqs = NULL;
870: PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);
871: if (reqs) {MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);}
872: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
873: PetscInt i,nranks;
874: const PetscMPIInt *ranks;
876: PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
877: for (i=0; i<nranks; i++) {
878: MPI_Win_unlock(ranks[i],win);
879: }
880: }
881: PetscSFRestoreWindow(sf,unit,(void*)rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSTORE|MPI_MODE_NOSUCCEED,PETSC_FALSE,&win);
882: return(0);
883: }
885: PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
886: {
887: PetscSF_Window *w = (PetscSF_Window*)sf->data;
888: PetscErrorCode ierr;
889: PetscInt i,nranks;
890: const PetscMPIInt *ranks;
891: const MPI_Aint *target_disp;
892: const MPI_Datatype *mine,*remote;
893: MPI_Win win;
896: PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
897: PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);
898: PetscSFWindowOpTranslate(&op);
899: PetscSFGetWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&target_disp,NULL,&win);
900: for (i=0; i<nranks; i++) {
901: MPI_Aint tdp = target_disp ? target_disp[i] : 0;
903: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);}
904: MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],tdp,1,remote[i],op,win);
905: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_unlock(ranks[i],win);}
906: }
907: return(0);
908: }
910: static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
911: {
912: PetscSF_Window *w = (PetscSF_Window*)sf->data;
914: MPI_Win win;
915: MPI_Request *reqs = NULL;
918: PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);
919: if (reqs) {MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);}
920: PetscSFRestoreWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSUCCEED,PETSC_TRUE,&win);
921: return(0);
922: }
924: static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
925: {
926: PetscErrorCode ierr;
927: PetscInt i,nranks;
928: const PetscMPIInt *ranks;
929: const MPI_Datatype *mine,*remote;
930: const MPI_Aint *target_disp;
931: MPI_Win win;
932: PetscSF_Window *w = (PetscSF_Window*)sf->data;
933: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
934: PetscSFWindowFlavorType oldf;
935: #endif
938: PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
939: PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);
940: PetscSFWindowOpTranslate(&op);
941: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
942: /* FetchAndOp without MPI_Get_Accumulate requires locking.
943: we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */
944: oldf = w->flavor;
945: w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
946: PetscSFGetWindow(sf,unit,rootdata,PETSCSF_WINDOW_SYNC_LOCK,PETSC_FALSE,0,0,0,&target_disp,NULL,&win);
947: #else
948: PetscSFGetWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&target_disp,NULL,&win);
949: #endif
950: for (i=0; i<nranks; i++) {
951: MPI_Aint tdp = target_disp ? target_disp[i] : 0;
953: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
954: MPI_Win_lock(MPI_LOCK_EXCLUSIVE,ranks[i],0,win);
955: MPI_Get(leafupdate,1,mine[i],ranks[i],tdp,1,remote[i],win);
956: MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],tdp,1,remote[i],op,win);
957: MPI_Win_unlock(ranks[i],win);
958: #else
959: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],0,win);}
960: MPI_Get_accumulate((void*)leafdata,1,mine[i],leafupdate,1,mine[i],ranks[i],tdp,1,remote[i],op,win);
961: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_unlock(ranks[i],win);}
962: #endif
963: }
964: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
965: w->flavor = oldf;
966: #endif
967: return(0);
968: }
970: static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
971: {
973: MPI_Win win;
974: #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
975: PetscSF_Window *w = (PetscSF_Window*)sf->data;
976: #endif
977: MPI_Request *reqs = NULL;
980: PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);
981: if (reqs) {MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);}
982: #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
983: PetscSFRestoreWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSUCCEED,PETSC_TRUE,&win);
984: #else
985: PetscSFRestoreWindow(sf,unit,rootdata,PETSCSF_WINDOW_SYNC_LOCK,PETSC_FALSE,0,PETSC_TRUE,&win);
986: #endif
987: return(0);
988: }
990: PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf)
991: {
992: PetscSF_Window *w = (PetscSF_Window*)sf->data;
996: sf->ops->SetUp = PetscSFSetUp_Window;
997: sf->ops->SetFromOptions = PetscSFSetFromOptions_Window;
998: sf->ops->Reset = PetscSFReset_Window;
999: sf->ops->Destroy = PetscSFDestroy_Window;
1000: sf->ops->View = PetscSFView_Window;
1001: sf->ops->Duplicate = PetscSFDuplicate_Window;
1002: sf->ops->BcastBegin = PetscSFBcastBegin_Window;
1003: sf->ops->BcastEnd = PetscSFBcastEnd_Window;
1004: sf->ops->ReduceBegin = PetscSFReduceBegin_Window;
1005: sf->ops->ReduceEnd = PetscSFReduceEnd_Window;
1006: sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
1007: sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Window;
1009: PetscNewLog(sf,&w);
1010: sf->data = (void*)w;
1011: w->sync = PETSCSF_WINDOW_SYNC_FENCE;
1012: w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
1013: w->info = MPI_INFO_NULL;
1015: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",PetscSFWindowSetSyncType_Window);
1016: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",PetscSFWindowGetSyncType_Window);
1017: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetFlavorType_C",PetscSFWindowSetFlavorType_Window);
1018: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetFlavorType_C",PetscSFWindowGetFlavorType_Window);
1019: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetInfo_C",PetscSFWindowSetInfo_Window);
1020: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetInfo_C",PetscSFWindowGetInfo_Window);
1022: #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6))
1023: {
1024: PetscBool ackbug = PETSC_FALSE;
1025: PetscOptionsGetBool(NULL,NULL,"-acknowledge_ompi_onesided_bug",&ackbug,NULL);
1026: if (ackbug) {
1027: PetscInfo(sf,"Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n");
1028: } else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_LIB,"Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
1029: }
1030: #endif
1031: return(0);
1032: }