Actual source code: sfwindow.c
petsc-3.9.4 2018-09-11
1: #include <petsc/private/sfimpl.h>
3: typedef struct _n_PetscSFDataLink *PetscSFDataLink;
4: typedef struct _n_PetscSFWinLink *PetscSFWinLink;
6: typedef struct {
7: PetscSFWindowSyncType sync; /* FENCE, LOCK, or ACTIVE synchronization */
8: PetscSFDataLink link; /* List of MPI data types and windows, lazily constructed for each data type */
9: PetscSFWinLink wins; /* List of active windows */
10: } PetscSF_Window;
12: struct _n_PetscSFDataLink {
13: MPI_Datatype unit;
14: MPI_Datatype *mine;
15: MPI_Datatype *remote;
16: PetscSFDataLink next;
17: };
19: struct _n_PetscSFWinLink {
20: PetscBool inuse;
21: size_t bytes;
22: void *addr;
23: MPI_Win win;
24: PetscBool epoch;
25: PetscSFWinLink next;
26: };
28: const char *const PetscSFWindowSyncTypes[] = {"FENCE","LOCK","ACTIVE","PetscSFWindowSyncType","PETSCSF_WINDOW_SYNC_",0};
30: /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPIU_Allreduce) */
31: static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op)
32: {
35: if (*op == MPIU_SUM) *op = MPI_SUM;
36: else if (*op == MPIU_MAX) *op = MPI_MAX;
37: else if (*op == MPIU_MIN) *op = MPI_MIN;
38: return(0);
39: }
41: /*@C
42: PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank
44: Not Collective
46: Input Arguments:
47: + sf - star forest
48: - unit - data type for each node
50: Output Arguments:
51: + localtypes - types describing part of local leaf buffer referencing each remote rank
52: - remotetypes - types describing part of remote root buffer referenced for each remote rank
54: Level: developer
56: .seealso: PetscSFSetGraph(), PetscSFView()
57: @*/
58: static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf,MPI_Datatype unit,const MPI_Datatype **localtypes,const MPI_Datatype **remotetypes)
59: {
60: PetscSF_Window *w = (PetscSF_Window*)sf->data;
61: PetscErrorCode ierr;
62: PetscSFDataLink link;
63: PetscInt i,nranks;
64: const PetscInt *roffset,*rmine,*rremote;
65: const PetscMPIInt *ranks;
68: /* Look for types in cache */
69: for (link=w->link; link; link=link->next) {
70: PetscBool match;
71: MPIPetsc_Type_compare(unit,link->unit,&match);
72: if (match) {
73: *localtypes = link->mine;
74: *remotetypes = link->remote;
75: return(0);
76: }
77: }
79: /* Create new composite types for each send rank */
80: PetscSFGetRanks(sf,&nranks,&ranks,&roffset,&rmine,&rremote);
81: PetscNew(&link);
82: MPI_Type_dup(unit,&link->unit);
83: PetscMalloc2(nranks,&link->mine,nranks,&link->remote);
84: for (i=0; i<nranks; i++) {
85: PETSC_UNUSED PetscInt rcount = roffset[i+1] - roffset[i];
86: PetscMPIInt *rmine,*rremote;
87: #if !defined(PETSC_USE_64BIT_INDICES)
88: rmine = sf->rmine + sf->roffset[i];
89: rremote = sf->rremote + sf->roffset[i];
90: #else
91: PetscInt j;
92: PetscMalloc2(rcount,&rmine,rcount,&rremote);
93: for (j=0; j<rcount; j++) {
94: PetscMPIIntCast(sf->rmine[sf->roffset[i]+j],rmine+j);
95: PetscMPIIntCast(sf->rremote[sf->roffset[i]+j],rremote+j);
96: }
97: #endif
98: MPI_Type_create_indexed_block(rcount,1,rmine,link->unit,&link->mine[i]);
99: MPI_Type_create_indexed_block(rcount,1,rremote,link->unit,&link->remote[i]);
100: #if defined(PETSC_USE_64BIT_INDICES)
101: PetscFree2(rmine,rremote);
102: #endif
103: MPI_Type_commit(&link->mine[i]);
104: MPI_Type_commit(&link->remote[i]);
105: }
106: link->next = w->link;
107: w->link = link;
109: *localtypes = link->mine;
110: *remotetypes = link->remote;
111: return(0);
112: }
114: /*@C
115: PetscSFWindowSetSyncType - set synchrozitaion type for PetscSF communication
117: Logically Collective
119: Input Arguments:
120: + sf - star forest for communication
121: - sync - synchronization type
123: Options Database Key:
124: . -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see PetscSFWindowSyncType)
126: Level: advanced
128: .seealso: PetscSFSetFromOptions(), PetscSFWindowGetSyncType()
129: @*/
130: PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf,PetscSFWindowSyncType sync)
131: {
137: PetscUseMethod(sf,"PetscSFWindowSetSyncType_C",(PetscSF,PetscSFWindowSyncType),(sf,sync));
138: return(0);
139: }
141: static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf,PetscSFWindowSyncType sync)
142: {
143: PetscSF_Window *w = (PetscSF_Window*)sf->data;
146: w->sync = sync;
147: return(0);
148: }
150: /*@C
151: PetscSFWindowGetSyncType - get synchrozitaion type for PetscSF communication
153: Logically Collective
155: Input Argument:
156: . sf - star forest for communication
158: Output Argument:
159: . sync - synchronization type
161: Level: advanced
163: .seealso: PetscSFGetFromOptions(), PetscSFWindowSetSyncType()
164: @*/
165: PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf,PetscSFWindowSyncType *sync)
166: {
172: PetscUseMethod(sf,"PetscSFWindowGetSyncType_C",(PetscSF,PetscSFWindowSyncType*),(sf,sync));
173: return(0);
174: }
176: static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf,PetscSFWindowSyncType *sync)
177: {
178: PetscSF_Window *w = (PetscSF_Window*)sf->data;
181: *sync = w->sync;
182: return(0);
183: }
185: /*@C
186: PetscSFGetWindow - Get a window for use with a given data type
188: Collective on PetscSF
190: Input Arguments:
191: + sf - star forest
192: . unit - data type
193: . array - array to be sent
194: . epoch - PETSC_TRUE to acquire the window and start an epoch, PETSC_FALSE to just acquire the window
195: . fenceassert - assert parameter for call to MPI_Win_fence(), if PETSCSF_WINDOW_SYNC_FENCE
196: . postassert - assert parameter for call to MPI_Win_post(), if PETSCSF_WINDOW_SYNC_ACTIVE
197: - startassert - assert parameter for call to MPI_Win_start(), if PETSCSF_WINDOW_SYNC_ACTIVE
199: Output Arguments:
200: . win - window
202: Level: developer
204: Developer Notes:
205: This currently always creates a new window. This is more synchronous than necessary. An alternative is to try to
206: reuse an existing window created with the same array. Another alternative is to maintain a cache of windows and reuse
207: whichever one is available, by copying the array into it if necessary.
209: .seealso: PetscSFGetRanks(), PetscSFWindowGetDataTypes()
210: @*/
211: static PetscErrorCode PetscSFGetWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscBool epoch,PetscMPIInt fenceassert,PetscMPIInt postassert,PetscMPIInt startassert,MPI_Win *win)
212: {
213: PetscSF_Window *w = (PetscSF_Window*)sf->data;
215: MPI_Aint lb,lb_true,bytes,bytes_true;
216: PetscSFWinLink link;
219: MPI_Type_get_extent(unit,&lb,&bytes);
220: MPI_Type_get_true_extent(unit,&lb_true,&bytes_true);
221: if (lb != 0 || lb_true != 0) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
222: if (bytes != bytes_true) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature");
223: PetscNew(&link);
225: link->bytes = bytes;
226: link->addr = array;
228: MPI_Win_create(array,(MPI_Aint)bytes*sf->nroots,(PetscMPIInt)bytes,MPI_INFO_NULL,PetscObjectComm((PetscObject)sf),&link->win);
230: link->epoch = epoch;
231: link->next = w->wins;
232: link->inuse = PETSC_TRUE;
233: w->wins = link;
234: *win = link->win;
236: if (epoch) {
237: switch (w->sync) {
238: case PETSCSF_WINDOW_SYNC_FENCE:
239: MPI_Win_fence(fenceassert,*win);
240: break;
241: case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
242: break;
243: case PETSCSF_WINDOW_SYNC_ACTIVE: {
244: MPI_Group ingroup,outgroup;
245: PetscSFGetGroups(sf,&ingroup,&outgroup);
246: MPI_Win_post(ingroup,postassert,*win);
247: MPI_Win_start(outgroup,startassert,*win);
248: } break;
249: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
250: }
251: }
252: return(0);
253: }
255: /*@C
256: PetscSFFindWindow - Finds a window that is already in use
258: Not Collective
260: Input Arguments:
261: + sf - star forest
262: . unit - data type
263: - array - array with which the window is associated
265: Output Arguments:
266: . win - window
268: Level: developer
270: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
271: @*/
272: static PetscErrorCode PetscSFFindWindow(PetscSF sf,MPI_Datatype unit,const void *array,MPI_Win *win)
273: {
274: PetscSF_Window *w = (PetscSF_Window*)sf->data;
275: PetscSFWinLink link;
278: *win = MPI_WIN_NULL;
279: for (link=w->wins; link; link=link->next) {
280: if (array == link->addr) {
281: *win = link->win;
282: return(0);
283: }
284: }
285: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
286: return(0);
287: }
289: /*@C
290: PetscSFRestoreWindow - Restores a window obtained with PetscSFGetWindow()
292: Collective
294: Input Arguments:
295: + sf - star forest
296: . unit - data type
297: . array - array associated with window
298: . epoch - close an epoch, must match argument to PetscSFGetWindow()
299: - win - window
301: Level: developer
303: .seealso: PetscSFFindWindow()
304: @*/
305: static PetscErrorCode PetscSFRestoreWindow(PetscSF sf,MPI_Datatype unit,const void *array,PetscBool epoch,PetscMPIInt fenceassert,MPI_Win *win)
306: {
307: PetscSF_Window *w = (PetscSF_Window*)sf->data;
309: PetscSFWinLink *p,link;
312: for (p=&w->wins; *p; p=&(*p)->next) {
313: link = *p;
314: if (*win == link->win) {
315: if (array != link->addr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Matched window, but not array");
316: if (epoch != link->epoch) {
317: if (epoch) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"No epoch to end");
318: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Restoring window without ending epoch");
319: }
320: *p = link->next;
321: goto found;
322: }
323: }
324: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
326: found:
327: if (epoch) {
328: switch (w->sync) {
329: case PETSCSF_WINDOW_SYNC_FENCE:
330: MPI_Win_fence(fenceassert,*win);
331: break;
332: case PETSCSF_WINDOW_SYNC_LOCK:
333: break; /* handled outside */
334: case PETSCSF_WINDOW_SYNC_ACTIVE: {
335: MPI_Win_complete(*win);
336: MPI_Win_wait(*win);
337: } break;
338: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
339: }
340: }
342: MPI_Win_free(&link->win);
343: PetscFree(link);
344: *win = MPI_WIN_NULL;
345: return(0);
346: }
348: static PetscErrorCode PetscSFSetUp_Window(PetscSF sf)
349: {
350: PetscSF_Window *w = (PetscSF_Window*)sf->data;
352: MPI_Group ingroup,outgroup;
355: PetscSFSetUpRanks(sf,MPI_GROUP_EMPTY);
356: switch (w->sync) {
357: case PETSCSF_WINDOW_SYNC_ACTIVE:
358: PetscSFGetGroups(sf,&ingroup,&outgroup);
359: default:
360: break;
361: }
362: return(0);
363: }
365: static PetscErrorCode PetscSFSetFromOptions_Window(PetscOptionItems *PetscOptionsObject,PetscSF sf)
366: {
367: PetscSF_Window *w = (PetscSF_Window*)sf->data;
371: PetscOptionsHead(PetscOptionsObject,"PetscSF Window options");
372: PetscOptionsEnum("-sf_window_sync","synchronization type to use for PetscSF Window communication","PetscSFWindowSetSyncType",PetscSFWindowSyncTypes,(PetscEnum)w->sync,(PetscEnum*)&w->sync,NULL);
373: PetscOptionsTail();
374: return(0);
375: }
377: static PetscErrorCode PetscSFReset_Window(PetscSF sf)
378: {
379: PetscSF_Window *w = (PetscSF_Window*)sf->data;
380: PetscErrorCode ierr;
381: PetscSFDataLink link,next;
382: PetscSFWinLink wlink,wnext;
383: PetscInt i;
386: for (link=w->link; link; link=next) {
387: next = link->next;
388: MPI_Type_free(&link->unit);
389: for (i=0; i<sf->nranks; i++) {
390: MPI_Type_free(&link->mine[i]);
391: MPI_Type_free(&link->remote[i]);
392: }
393: PetscFree2(link->mine,link->remote);
394: PetscFree(link);
395: }
396: w->link = NULL;
397: for (wlink=w->wins; wlink; wlink=wnext) {
398: wnext = wlink->next;
399: if (wlink->inuse) SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Window still in use with address %p",(void*)wlink->addr);
400: MPI_Win_free(&wlink->win);
401: PetscFree(wlink);
402: }
403: w->wins = NULL;
404: return(0);
405: }
407: static PetscErrorCode PetscSFDestroy_Window(PetscSF sf)
408: {
412: PetscSFReset_Window(sf);
413: PetscFree(sf->data);
414: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",NULL);
415: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",NULL);
416: return(0);
417: }
419: static PetscErrorCode PetscSFView_Window(PetscSF sf,PetscViewer viewer)
420: {
421: PetscSF_Window *w = (PetscSF_Window*)sf->data;
423: PetscBool iascii;
426: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
427: if (iascii) {
428: PetscViewerASCIIPrintf(viewer," synchronization=%s sort=%s\n",PetscSFWindowSyncTypes[w->sync],sf->rankorder ? "rank-order" : "unordered");
429: }
430: return(0);
431: }
433: static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf,PetscSFDuplicateOption opt,PetscSF newsf)
434: {
435: PetscSF_Window *w = (PetscSF_Window*)sf->data;
436: PetscErrorCode ierr;
437: PetscSFWindowSyncType synctype;
440: synctype = w->sync;
441: if (!sf->setupcalled) {
442: /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
443: synctype = PETSCSF_WINDOW_SYNC_LOCK;
444: }
445: PetscSFWindowSetSyncType(newsf,synctype);
446: return(0);
447: }
449: static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
450: {
451: PetscSF_Window *w = (PetscSF_Window*)sf->data;
452: PetscErrorCode ierr;
453: PetscInt i,nranks;
454: const PetscMPIInt *ranks;
455: const MPI_Datatype *mine,*remote;
456: MPI_Win win;
459: PetscSFGetRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
460: PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);
461: PetscSFGetWindow(sf,unit,(void*)rootdata,PETSC_TRUE,MPI_MODE_NOPUT|MPI_MODE_NOPRECEDE,MPI_MODE_NOPUT,0,&win);
462: for (i=0; i<nranks; i++) {
463: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);}
464: MPI_Get(leafdata,1,mine[i],ranks[i],0,1,remote[i],win);
465: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_unlock(ranks[i],win);}
466: }
467: return(0);
468: }
470: PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
471: {
473: MPI_Win win;
476: PetscSFFindWindow(sf,unit,rootdata,&win);
477: PetscSFRestoreWindow(sf,unit,rootdata,PETSC_TRUE,MPI_MODE_NOSTORE|MPI_MODE_NOSUCCEED,&win);
478: return(0);
479: }
481: PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
482: {
483: PetscSF_Window *w = (PetscSF_Window*)sf->data;
484: PetscErrorCode ierr;
485: PetscInt i,nranks;
486: const PetscMPIInt *ranks;
487: const MPI_Datatype *mine,*remote;
488: MPI_Win win;
491: PetscSFGetRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
492: PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);
493: PetscSFWindowOpTranslate(&op);
494: PetscSFGetWindow(sf,unit,rootdata,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&win);
495: for (i=0; i<nranks; i++) {
496: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);}
497: MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],0,1,remote[i],op,win);
498: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_unlock(ranks[i],win);}
499: }
500: return(0);
501: }
503: static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
504: {
505: PetscSF_Window *w = (PetscSF_Window*)sf->data;
507: MPI_Win win;
510: if (!w->wins) return(0);
511: PetscSFFindWindow(sf,unit,rootdata,&win);
512: MPI_Win_fence(MPI_MODE_NOSUCCEED,win);
513: PetscSFRestoreWindow(sf,unit,rootdata,PETSC_TRUE,MPI_MODE_NOSUCCEED,&win);
514: return(0);
515: }
516: static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
517: {
518: PetscErrorCode ierr;
519: PetscInt i,nranks;
520: const PetscMPIInt *ranks;
521: const MPI_Datatype *mine,*remote;
522: MPI_Win win;
525: PetscSFGetRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
526: PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);
527: PetscSFWindowOpTranslate(&op);
528: PetscSFGetWindow(sf,unit,rootdata,PETSC_FALSE,0,0,0,&win);
529: for (i=0; i<sf->nranks; i++) {
530: MPI_Win_lock(MPI_LOCK_EXCLUSIVE,sf->ranks[i],0,win);
531: MPI_Get(leafupdate,1,mine[i],ranks[i],0,1,remote[i],win);
532: MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],0,1,remote[i],op,win);
533: MPI_Win_unlock(ranks[i],win);
534: }
535: return(0);
536: }
538: static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
539: {
541: MPI_Win win;
544: PetscSFFindWindow(sf,unit,rootdata,&win);
545: /* Nothing to do currently because MPI_LOCK_EXCLUSIVE is used in PetscSFFetchAndOpBegin(), rendering this implementation synchronous. */
546: PetscSFRestoreWindow(sf,unit,rootdata,PETSC_FALSE,0,&win);
547: return(0);
548: }
550: PETSC_EXTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf)
551: {
552: PetscSF_Window *w = (PetscSF_Window*)sf->data;
556: sf->ops->SetUp = PetscSFSetUp_Window;
557: sf->ops->SetFromOptions = PetscSFSetFromOptions_Window;
558: sf->ops->Reset = PetscSFReset_Window;
559: sf->ops->Destroy = PetscSFDestroy_Window;
560: sf->ops->View = PetscSFView_Window;
561: sf->ops->Duplicate = PetscSFDuplicate_Window;
562: sf->ops->BcastBegin = PetscSFBcastBegin_Window;
563: sf->ops->BcastEnd = PetscSFBcastEnd_Window;
564: sf->ops->ReduceBegin = PetscSFReduceBegin_Window;
565: sf->ops->ReduceEnd = PetscSFReduceEnd_Window;
566: sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
567: sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Window;
569: PetscNewLog(sf,&w);
570: sf->data = (void*)w;
571: w->sync = PETSCSF_WINDOW_SYNC_FENCE;
573: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",PetscSFWindowSetSyncType_Window);
574: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",PetscSFWindowGetSyncType_Window);
576: #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6))
577: {
578: PetscBool ackbug = PETSC_FALSE;
579: PetscOptionsGetBool(NULL,NULL,"-acknowledge_ompi_onesided_bug",&ackbug,NULL);
580: if (ackbug) {
581: PetscInfo(sf,"Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n");
582: } else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_LIB,"Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
583: }
584: #endif
585: return(0);
586: }