Actual source code: sfwindow.c
petsc-3.6.1 2015-08-06
1: #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
3: typedef struct _n_PetscSFDataLink *PetscSFDataLink;
4: typedef struct _n_PetscSFWinLink *PetscSFWinLink;
6: typedef struct {
7: PetscSFWindowSyncType sync; /* FENCE, LOCK, or ACTIVE synchronization */
8: PetscSFDataLink link; /* List of MPI data types and windows, lazily constructed for each data type */
9: PetscSFWinLink wins; /* List of active windows */
10: } PetscSF_Window;
12: struct _n_PetscSFDataLink {
13: MPI_Datatype unit;
14: MPI_Datatype *mine;
15: MPI_Datatype *remote;
16: PetscSFDataLink next;
17: };
19: struct _n_PetscSFWinLink {
20: PetscBool inuse;
21: size_t bytes;
22: void *addr;
23: MPI_Win win;
24: PetscBool epoch;
25: PetscSFWinLink next;
26: };
28: const char *const PetscSFWindowSyncTypes[] = {"FENCE","LOCK","ACTIVE","PetscSFWindowSyncType","PETSCSF_WINDOW_SYNC_",0};
32: /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */
33: static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op)
34: {
37: if (*op == MPIU_SUM) *op = MPI_SUM;
38: else if (*op == MPIU_MAX) *op = MPI_MAX;
39: else if (*op == MPIU_MIN) *op = MPI_MIN;
40: return(0);
41: }
45: /*@C
46: PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank
48: Not Collective
50: Input Arguments:
51: + sf - star forest
52: - unit - data type for each node
54: Output Arguments:
55: + localtypes - types describing part of local leaf buffer referencing each remote rank
56: - remotetypes - types describing part of remote root buffer referenced for each remote rank
58: Level: developer
60: .seealso: PetscSFSetGraph(), PetscSFView()
61: @*/
62: static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf,MPI_Datatype unit,const MPI_Datatype **localtypes,const MPI_Datatype **remotetypes)
63: {
64: PetscSF_Window *w = (PetscSF_Window*)sf->data;
65: PetscErrorCode ierr;
66: PetscSFDataLink link;
67: PetscInt i,nranks;
68: const PetscInt *roffset,*rmine,*rremote;
69: const PetscMPIInt *ranks;
72: /* Look for types in cache */
73: for (link=w->link; link; link=link->next) {
74: PetscBool match;
75: MPIPetsc_Type_compare(unit,link->unit,&match);
76: if (match) {
77: *localtypes = link->mine;
78: *remotetypes = link->remote;
79: return(0);
80: }
81: }
83: /* Create new composite types for each send rank */
84: PetscSFGetRanks(sf,&nranks,&ranks,&roffset,&rmine,&rremote);
85: PetscMalloc(sizeof(*link),&link);
86: MPI_Type_dup(unit,&link->unit);
87: PetscMalloc2(nranks,&link->mine,nranks,&link->remote);
88: for (i=0; i<nranks; i++) {
89: PETSC_UNUSED PetscInt rcount = roffset[i+1] - roffset[i];
90: PetscMPIInt *rmine,*rremote;
91: #if !defined(PETSC_USE_64BIT_INDICES)
92: rmine = sf->rmine + sf->roffset[i];
93: rremote = sf->rremote + sf->roffset[i];
94: #else
95: PetscInt j;
96: PetscMalloc2(rcount,&rmine,rcount,&rremote);
97: for (j=0; j<rcount; j++) {
98: PetscMPIIntCast(sf->rmine[sf->roffset[i]+j],rmine+j);
99: PetscMPIIntCast(sf->rremote[sf->roffset[i]+j],rremote+j);
100: }
101: #endif
102: MPI_Type_create_indexed_block(rcount,1,rmine,link->unit,&link->mine[i]);
103: MPI_Type_create_indexed_block(rcount,1,rremote,link->unit,&link->remote[i]);
104: #if defined(PETSC_USE_64BIT_INDICES)
105: PetscFree2(rmine,rremote);
106: #endif
107: MPI_Type_commit(&link->mine[i]);
108: MPI_Type_commit(&link->remote[i]);
109: }
110: link->next = w->link;
111: w->link = link;
113: *localtypes = link->mine;
114: *remotetypes = link->remote;
115: return(0);
116: }
120: /*@C
121: PetscSFWindowSetSyncType - set synchrozitaion type for PetscSF communication
123: Logically Collective
125: Input Arguments:
126: + sf - star forest for communication
127: - sync - synchronization type
129: Options Database Key:
130: . -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see PetscSFWindowSyncType)
132: Level: advanced
134: .seealso: PetscSFSetFromOptions(), PetscSFWindowGetSyncType()
135: @*/
136: PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf,PetscSFWindowSyncType sync)
137: {
143: PetscUseMethod(sf,"PetscSFWindowSetSyncType_C",(PetscSF,PetscSFWindowSyncType),(sf,sync));
144: return(0);
145: }
149: static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf,PetscSFWindowSyncType sync)
150: {
151: PetscSF_Window *w = (PetscSF_Window*)sf->data;
154: w->sync = sync;
155: return(0);
156: }
160: /*@C
161: PetscSFWindowGetSyncType - get synchrozitaion type for PetscSF communication
163: Logically Collective
165: Input Argument:
166: . sf - star forest for communication
168: Output Argument:
169: . sync - synchronization type
171: Level: advanced
173: .seealso: PetscSFGetFromOptions(), PetscSFWindowSetSyncType()
174: @*/
175: PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf,PetscSFWindowSyncType *sync)
176: {
182: PetscTryMethod(sf,"PetscSFWindowGetSyncType_C",(PetscSF,PetscSFWindowSyncType*),(sf,sync));
183: return(0);
184: }
188: static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf,PetscSFWindowSyncType *sync)
189: {
190: PetscSF_Window *w = (PetscSF_Window*)sf->data;
193: *sync = w->sync;
194: return(0);
195: }
199: /*@C
200: PetscSFGetWindow - Get a window for use with a given data type
202: Collective on PetscSF
204: Input Arguments:
205: + sf - star forest
206: . unit - data type
207: . array - array to be sent
208: . epoch - PETSC_TRUE to acquire the window and start an epoch, PETSC_FALSE to just acquire the window
209: . fenceassert - assert parameter for call to MPI_Win_fence(), if PETSCSF_WINDOW_SYNC_FENCE
210: . postassert - assert parameter for call to MPI_Win_post(), if PETSCSF_WINDOW_SYNC_ACTIVE
211: - startassert - assert parameter for call to MPI_Win_start(), if PETSCSF_WINDOW_SYNC_ACTIVE
213: Output Arguments:
214: . win - window
216: Level: developer
218: Developer Notes:
219: This currently always creates a new window. This is more synchronous than necessary. An alternative is to try to
220: reuse an existing window created with the same array. Another alternative is to maintain a cache of windows and reuse
221: whichever one is available, by copying the array into it if necessary.
223: .seealso: PetscSFGetRanks(), PetscSFWindowGetDataTypes()
224: @*/
225: static PetscErrorCode PetscSFGetWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscBool epoch,PetscMPIInt fenceassert,PetscMPIInt postassert,PetscMPIInt startassert,MPI_Win *win)
226: {
227: PetscSF_Window *w = (PetscSF_Window*)sf->data;
229: MPI_Aint lb,lb_true,bytes,bytes_true;
230: PetscSFWinLink link;
233: MPI_Type_get_extent(unit,&lb,&bytes);
234: MPI_Type_get_true_extent(unit,&lb_true,&bytes_true);
235: if (lb != 0 || lb_true != 0) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
236: if (bytes != bytes_true) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature");
237: PetscMalloc(sizeof(*link),&link);
239: link->bytes = bytes;
240: link->addr = array;
242: MPI_Win_create(array,(MPI_Aint)bytes*sf->nroots,(PetscMPIInt)bytes,MPI_INFO_NULL,PetscObjectComm((PetscObject)sf),&link->win);
244: link->epoch = epoch;
245: link->next = w->wins;
246: link->inuse = PETSC_TRUE;
247: w->wins = link;
248: *win = link->win;
250: if (epoch) {
251: switch (w->sync) {
252: case PETSCSF_WINDOW_SYNC_FENCE:
253: MPI_Win_fence(fenceassert,*win);
254: break;
255: case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
256: break;
257: case PETSCSF_WINDOW_SYNC_ACTIVE: {
258: MPI_Group ingroup,outgroup;
259: PetscSFGetGroups(sf,&ingroup,&outgroup);
260: MPI_Win_post(ingroup,postassert,*win);
261: MPI_Win_start(outgroup,startassert,*win);
262: } break;
263: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
264: }
265: }
266: return(0);
267: }
271: /*@C
272: PetscSFFindWindow - Finds a window that is already in use
274: Not Collective
276: Input Arguments:
277: + sf - star forest
278: . unit - data type
279: - array - array with which the window is associated
281: Output Arguments:
282: . win - window
284: Level: developer
286: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
287: @*/
288: static PetscErrorCode PetscSFFindWindow(PetscSF sf,MPI_Datatype unit,const void *array,MPI_Win *win)
289: {
290: PetscSF_Window *w = (PetscSF_Window*)sf->data;
291: PetscSFWinLink link;
294: *win = MPI_WIN_NULL;
295: for (link=w->wins; link; link=link->next) {
296: if (array == link->addr) {
297: *win = link->win;
298: return(0);
299: }
300: }
301: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
302: return(0);
303: }
307: /*@C
308: PetscSFRestoreWindow - Restores a window obtained with PetscSFGetWindow()
310: Collective
312: Input Arguments:
313: + sf - star forest
314: . unit - data type
315: . array - array associated with window
316: . epoch - close an epoch, must match argument to PetscSFGetWindow()
317: - win - window
319: Level: developer
321: .seealso: PetscSFFindWindow()
322: @*/
323: static PetscErrorCode PetscSFRestoreWindow(PetscSF sf,MPI_Datatype unit,const void *array,PetscBool epoch,PetscMPIInt fenceassert,MPI_Win *win)
324: {
325: PetscSF_Window *w = (PetscSF_Window*)sf->data;
327: PetscSFWinLink *p,link;
330: for (p=&w->wins; *p; p=&(*p)->next) {
331: link = *p;
332: if (*win == link->win) {
333: if (array != link->addr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Matched window, but not array");
334: if (epoch != link->epoch) {
335: if (epoch) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"No epoch to end");
336: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Restoring window without ending epoch");
337: }
338: *p = link->next;
339: goto found;
340: }
341: }
342: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
344: found:
345: if (epoch) {
346: switch (w->sync) {
347: case PETSCSF_WINDOW_SYNC_FENCE:
348: MPI_Win_fence(fenceassert,*win);
349: break;
350: case PETSCSF_WINDOW_SYNC_LOCK:
351: break; /* handled outside */
352: case PETSCSF_WINDOW_SYNC_ACTIVE: {
353: MPI_Win_complete(*win);
354: MPI_Win_wait(*win);
355: } break;
356: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
357: }
358: }
360: MPI_Win_free(&link->win);
361: PetscFree(link);
362: *win = MPI_WIN_NULL;
363: return(0);
364: }
368: static PetscErrorCode PetscSFSetUp_Window(PetscSF sf)
369: {
370: PetscSF_Window *w = (PetscSF_Window*)sf->data;
372: MPI_Group ingroup,outgroup;
375: switch (w->sync) {
376: case PETSCSF_WINDOW_SYNC_ACTIVE:
377: PetscSFGetGroups(sf,&ingroup,&outgroup);
378: default:
379: break;
380: }
381: return(0);
382: }
386: static PetscErrorCode PetscSFSetFromOptions_Window(PetscOptions *PetscOptionsObject,PetscSF sf)
387: {
388: PetscSF_Window *w = (PetscSF_Window*)sf->data;
392: PetscOptionsHead(PetscOptionsObject,"PetscSF Window options");
393: PetscOptionsEnum("-sf_window_sync","synchronization type to use for PetscSF Window communication","PetscSFWindowSetSyncType",PetscSFWindowSyncTypes,(PetscEnum)w->sync,(PetscEnum*)&w->sync,NULL);
394: PetscOptionsTail();
395: return(0);
396: }
400: static PetscErrorCode PetscSFReset_Window(PetscSF sf)
401: {
402: PetscSF_Window *w = (PetscSF_Window*)sf->data;
403: PetscErrorCode ierr;
404: PetscSFDataLink link,next;
405: PetscSFWinLink wlink,wnext;
406: PetscInt i;
409: for (link=w->link; link; link=next) {
410: next = link->next;
411: MPI_Type_free(&link->unit);
412: for (i=0; i<sf->nranks; i++) {
413: MPI_Type_free(&link->mine[i]);
414: MPI_Type_free(&link->remote[i]);
415: }
416: PetscFree2(link->mine,link->remote);
417: PetscFree(link);
418: }
419: w->link = NULL;
420: for (wlink=w->wins; wlink; wlink=wnext) {
421: wnext = wlink->next;
422: if (wlink->inuse) SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Window still in use with address %p",(void*)wlink->addr);
423: MPI_Win_free(&wlink->win);
424: PetscFree(wlink);
425: }
426: w->wins = NULL;
427: return(0);
428: }
432: static PetscErrorCode PetscSFDestroy_Window(PetscSF sf)
433: {
437: PetscSFReset_Window(sf);
438: PetscFree(sf->data);
439: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",NULL);
440: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",NULL);
441: return(0);
442: }
446: static PetscErrorCode PetscSFView_Window(PetscSF sf,PetscViewer viewer)
447: {
448: PetscSF_Window *w = (PetscSF_Window*)sf->data;
450: PetscBool iascii;
453: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
454: if (iascii) {
455: PetscViewerASCIIPrintf(viewer," synchronization=%s sort=%s\n",PetscSFWindowSyncTypes[w->sync],sf->rankorder ? "rank-order" : "unordered");
456: }
457: return(0);
458: }
462: static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf,PetscSFDuplicateOption opt,PetscSF newsf)
463: {
464: PetscSF_Window *w = (PetscSF_Window*)sf->data;
465: PetscErrorCode ierr;
466: PetscSFWindowSyncType synctype;
469: synctype = w->sync;
470: if (!sf->setupcalled) {
471: /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
472: synctype = PETSCSF_WINDOW_SYNC_LOCK;
473: }
474: PetscSFWindowSetSyncType(newsf,synctype);
475: return(0);
476: }
480: static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
481: {
482: PetscSF_Window *w = (PetscSF_Window*)sf->data;
483: PetscErrorCode ierr;
484: PetscInt i,nranks;
485: const PetscMPIInt *ranks;
486: const MPI_Datatype *mine,*remote;
487: MPI_Win win;
490: PetscSFGetRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
491: PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);
492: PetscSFGetWindow(sf,unit,(void*)rootdata,PETSC_TRUE,MPI_MODE_NOPUT|MPI_MODE_NOPRECEDE,MPI_MODE_NOPUT,0,&win);
493: for (i=0; i<nranks; i++) {
494: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);}
495: MPI_Get(leafdata,1,mine[i],ranks[i],0,1,remote[i],win);
496: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_unlock(ranks[i],win);}
497: }
498: return(0);
499: }
503: PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
504: {
506: MPI_Win win;
509: PetscSFFindWindow(sf,unit,rootdata,&win);
510: PetscSFRestoreWindow(sf,unit,rootdata,PETSC_TRUE,MPI_MODE_NOSTORE|MPI_MODE_NOSUCCEED,&win);
511: return(0);
512: }
516: PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
517: {
518: PetscSF_Window *w = (PetscSF_Window*)sf->data;
519: PetscErrorCode ierr;
520: PetscInt i,nranks;
521: const PetscMPIInt *ranks;
522: const MPI_Datatype *mine,*remote;
523: MPI_Win win;
526: PetscSFGetRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
527: PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);
528: PetscSFWindowOpTranslate(&op);
529: PetscSFGetWindow(sf,unit,rootdata,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&win);
530: for (i=0; i<nranks; i++) {
531: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);}
532: MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],0,1,remote[i],op,win);
533: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_unlock(ranks[i],win);}
534: }
535: return(0);
536: }
540: static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
541: {
542: PetscSF_Window *w = (PetscSF_Window*)sf->data;
544: MPI_Win win;
547: if (!w->wins) return(0);
548: PetscSFFindWindow(sf,unit,rootdata,&win);
549: MPI_Win_fence(MPI_MODE_NOSUCCEED,win);
550: PetscSFRestoreWindow(sf,unit,rootdata,PETSC_TRUE,MPI_MODE_NOSUCCEED,&win);
551: return(0);
552: }
555: static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
556: {
557: PetscErrorCode ierr;
558: PetscInt i,nranks;
559: const PetscMPIInt *ranks;
560: const MPI_Datatype *mine,*remote;
561: MPI_Win win;
564: PetscSFGetRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
565: PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);
566: PetscSFWindowOpTranslate(&op);
567: PetscSFGetWindow(sf,unit,rootdata,PETSC_FALSE,0,0,0,&win);
568: for (i=0; i<sf->nranks; i++) {
569: MPI_Win_lock(MPI_LOCK_EXCLUSIVE,sf->ranks[i],0,win);
570: MPI_Get(leafupdate,1,mine[i],ranks[i],0,1,remote[i],win);
571: MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],0,1,remote[i],op,win);
572: MPI_Win_unlock(ranks[i],win);
573: }
574: return(0);
575: }
579: static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
580: {
582: MPI_Win win;
585: PetscSFFindWindow(sf,unit,rootdata,&win);
586: /* Nothing to do currently because MPI_LOCK_EXCLUSIVE is used in PetscSFFetchAndOpBegin(), rendering this implementation synchronous. */
587: PetscSFRestoreWindow(sf,unit,rootdata,PETSC_FALSE,0,&win);
588: return(0);
589: }
593: PETSC_EXTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf)
594: {
595: PetscSF_Window *w = (PetscSF_Window*)sf->data;
599: sf->ops->SetUp = PetscSFSetUp_Window;
600: sf->ops->SetFromOptions = PetscSFSetFromOptions_Window;
601: sf->ops->Reset = PetscSFReset_Window;
602: sf->ops->Destroy = PetscSFDestroy_Window;
603: sf->ops->View = PetscSFView_Window;
604: sf->ops->Duplicate = PetscSFDuplicate_Window;
605: sf->ops->BcastBegin = PetscSFBcastBegin_Window;
606: sf->ops->BcastEnd = PetscSFBcastEnd_Window;
607: sf->ops->ReduceBegin = PetscSFReduceBegin_Window;
608: sf->ops->ReduceEnd = PetscSFReduceEnd_Window;
609: sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
610: sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Window;
612: PetscNewLog(sf,&w);
613: sf->data = (void*)w;
614: w->sync = PETSCSF_WINDOW_SYNC_FENCE;
616: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",PetscSFWindowSetSyncType_Window);
617: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",PetscSFWindowGetSyncType_Window);
619: #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6))
620: {
621: PetscBool ackbug = PETSC_FALSE;
622: PetscOptionsGetBool(NULL,"-acknowledge_ompi_onesided_bug",&ackbug,NULL);
623: if (ackbug) {
624: PetscInfo(sf,"Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n");
625: } else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_LIB,"Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
626: }
627: #endif
628: return(0);
629: }