Actual source code: sfwindow.c
petsc-3.4.5 2014-06-29
1: #include <petsc-private/sfimpl.h> /*I "petscsf.h" I*/
3: typedef struct _n_PetscSFDataLink *PetscSFDataLink;
4: typedef struct _n_PetscSFWinLink *PetscSFWinLink;
6: typedef struct {
7: PetscSFWindowSyncType sync; /* FENCE, LOCK, or ACTIVE synchronization */
8: PetscSFDataLink link; /* List of MPI data types and windows, lazily constructed for each data type */
9: PetscSFWinLink wins; /* List of active windows */
10: } PetscSF_Window;
12: struct _n_PetscSFDataLink {
13: MPI_Datatype unit;
14: MPI_Datatype *mine;
15: MPI_Datatype *remote;
16: PetscSFDataLink next;
17: };
19: struct _n_PetscSFWinLink {
20: PetscBool inuse;
21: size_t bytes;
22: void *addr;
23: MPI_Win win;
24: PetscBool epoch;
25: PetscSFWinLink next;
26: };
28: const char *const PetscSFWindowSyncTypes[] = {"FENCE","LOCK","ACTIVE","PetscSFWindowSyncType","PETSCSF_WINDOW_SYNC_",0};
32: /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */
33: static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op)
34: {
37: if (*op == MPIU_SUM) *op = MPI_SUM;
38: else if (*op == MPIU_MAX) *op = MPI_MAX;
39: else if (*op == MPIU_MIN) *op = MPI_MIN;
40: return(0);
41: }
45: /*@C
46: PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank
48: Not Collective
50: Input Arguments:
51: + sf - star forest
52: - unit - data type for each node
54: Output Arguments:
55: + localtypes - types describing part of local leaf buffer referencing each remote rank
56: - remotetypes - types describing part of remote root buffer referenced for each remote rank
58: Level: developer
60: .seealso: PetscSFSetGraph(), PetscSFView()
61: @*/
62: static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf,MPI_Datatype unit,const MPI_Datatype **localtypes,const MPI_Datatype **remotetypes)
63: {
64: PetscSF_Window *w = (PetscSF_Window*)sf->data;
65: PetscErrorCode ierr;
66: PetscSFDataLink link;
67: PetscInt i,nranks;
68: const PetscInt *roffset,*rmine,*rremote;
69: const PetscMPIInt *ranks;
72: /* Look for types in cache */
73: for (link=w->link; link; link=link->next) {
74: PetscBool match;
75: MPIPetsc_Type_compare(unit,link->unit,&match);
76: if (match) {
77: *localtypes = link->mine;
78: *remotetypes = link->remote;
79: return(0);
80: }
81: }
83: /* Create new composite types for each send rank */
84: PetscSFGetRanks(sf,&nranks,&ranks,&roffset,&rmine,&rremote);
85: PetscMalloc(sizeof(*link),&link);
86: MPI_Type_dup(unit,&link->unit);
87: PetscMalloc2(nranks,MPI_Datatype,&link->mine,nranks,MPI_Datatype,&link->remote);
88: for (i=0; i<nranks; i++) {
89: PETSC_UNUSED PetscInt rcount = roffset[i+1] - roffset[i];
90: PetscMPIInt *rmine,*rremote;
91: #if !defined(PETSC_USE_64BIT_INDICES)
92: rmine = sf->rmine + sf->roffset[i];
93: rremote = sf->rremote + sf->roffset[i];
94: #else
95: PetscInt j;
96: PetscMalloc2(rcount,PetscMPIInt,&rmine,rcount,PetscMPIInt,&rremote);
97: for (j=0; j<rcount; j++) {
98: PetscMPIIntCast(sf->rmine[sf->roffset[i]+j],rmine+j);
99: PetscMPIIntCast(sf->rremote[sf->roffset[i]+j],rremote+j);
100: }
101: #endif
102: MPI_Type_create_indexed_block(rcount,1,rmine,link->unit,&link->mine[i]);
103: MPI_Type_create_indexed_block(rcount,1,rremote,link->unit,&link->remote[i]);
104: #if defined(PETSC_USE_64BIT_INDICES)
105: PetscFree2(rmine,rremote);
106: #endif
107: MPI_Type_commit(&link->mine[i]);
108: MPI_Type_commit(&link->remote[i]);
109: }
110: link->next = w->link;
111: w->link = link;
113: *localtypes = link->mine;
114: *remotetypes = link->remote;
115: return(0);
116: }
120: /*@C
121: PetscSFWindowSetSyncType - set synchrozitaion type for PetscSF communication
123: Logically Collective
125: Input Arguments:
126: + sf - star forest for communication
127: - sync - synchronization type
129: Options Database Key:
130: . -sf_synchronization <sync> - sets the synchronization type
132: Level: advanced
134: .seealso: PetscSFSetFromOptions(), PetscSFWindowGetSyncType()
135: @*/
136: PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf,PetscSFWindowSyncType sync)
137: {
143: PetscUseMethod(sf,"PetscSFWindowSetSyncType_C",(PetscSF,PetscSFWindowSyncType),(sf,sync));
144: return(0);
145: }
149: static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf,PetscSFWindowSyncType sync)
150: {
151: PetscSF_Window *w = (PetscSF_Window*)sf->data;
154: w->sync = sync;
155: return(0);
156: }
160: /*@C
161: PetscSFWindowGetSyncType - get synchrozitaion type for PetscSF communication
163: Logically Collective
165: Input Argument:
166: . sf - star forest for communication
168: Output Argument:
169: . sync - synchronization type
171: Level: advanced
173: .seealso: PetscSFGetFromOptions(), PetscSFWindowSetSyncType()
174: @*/
175: PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf,PetscSFWindowSyncType *sync)
176: {
182: PetscTryMethod(sf,"PetscSFWindowGetSyncType_C",(PetscSF,PetscSFWindowSyncType*),(sf,sync));
183: return(0);
184: }
188: static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf,PetscSFWindowSyncType *sync)
189: {
190: PetscSF_Window *w = (PetscSF_Window*)sf->data;
193: *sync = w->sync;
194: return(0);
195: }
199: /*@C
200: PetscSFGetWindow - Get a window for use with a given data type
202: Collective on PetscSF
204: Input Arguments:
205: + sf - star forest
206: . unit - data type
207: . array - array to be sent
208: . epoch - PETSC_TRUE to acquire the window and start an epoch, PETSC_FALSE to just acquire the window
209: . fenceassert - assert parameter for call to MPI_Win_fence(), if PETSCSF_WINDOW_SYNC_FENCE
210: . postassert - assert parameter for call to MPI_Win_post(), if PETSCSF_WINDOW_SYNC_ACTIVE
211: - startassert - assert parameter for call to MPI_Win_start(), if PETSCSF_WINDOW_SYNC_ACTIVE
213: Output Arguments:
214: . win - window
216: Level: developer
218: Developer Notes:
219: This currently always creates a new window. This is more synchronous than necessary. An alternative is to try to
220: reuse an existing window created with the same array. Another alternative is to maintain a cache of windows and reuse
221: whichever one is available, by copying the array into it if necessary.
223: .seealso: PetscSFGetRanks(), PetscSFWindowGetDataTypes()
224: @*/
225: static PetscErrorCode PetscSFGetWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscBool epoch,PetscMPIInt fenceassert,PetscMPIInt postassert,PetscMPIInt startassert,MPI_Win *win)
226: {
227: PetscSF_Window *w = (PetscSF_Window*)sf->data;
229: MPI_Aint lb,lb_true,bytes,bytes_true;
230: PetscSFWinLink link;
233: MPI_Type_get_extent(unit,&lb,&bytes);
234: MPI_Type_get_true_extent(unit,&lb_true,&bytes_true);
235: if (lb != 0 || lb_true != 0) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
236: if (bytes != bytes_true) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature");
237: PetscMalloc(sizeof(*link),&link);
239: link->bytes = bytes;
240: link->addr = array;
242: MPI_Win_create(array,(MPI_Aint)bytes*sf->nroots,(PetscMPIInt)bytes,MPI_INFO_NULL,PetscObjectComm((PetscObject)sf),&link->win);
244: link->epoch = epoch;
245: link->next = w->wins;
246: link->inuse = PETSC_TRUE;
247: w->wins = link;
248: *win = link->win;
250: if (epoch) {
251: switch (w->sync) {
252: case PETSCSF_WINDOW_SYNC_FENCE:
253: MPI_Win_fence(fenceassert,*win);
254: break;
255: case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
256: break;
257: case PETSCSF_WINDOW_SYNC_ACTIVE: {
258: MPI_Group ingroup,outgroup;
259: PetscSFGetGroups(sf,&ingroup,&outgroup);
260: MPI_Win_post(ingroup,postassert,*win);
261: MPI_Win_start(outgroup,startassert,*win);
262: } break;
263: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
264: }
265: }
266: return(0);
267: }
271: /*@C
272: PetscSFFindWindow - Finds a window that is already in use
274: Not Collective
276: Input Arguments:
277: + sf - star forest
278: . unit - data type
279: - array - array with which the window is associated
281: Output Arguments:
282: . win - window
284: Level: developer
286: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
287: @*/
288: static PetscErrorCode PetscSFFindWindow(PetscSF sf,MPI_Datatype unit,const void *array,MPI_Win *win)
289: {
290: PetscSF_Window *w = (PetscSF_Window*)sf->data;
291: PetscSFWinLink link;
294: for (link=w->wins; link; link=link->next) {
295: if (array == link->addr) {
296: *win = link->win;
297: return(0);
298: }
299: }
300: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
301: return(0);
302: }
306: /*@C
307: PetscSFRestoreWindow - Restores a window obtained with PetscSFGetWindow()
309: Collective
311: Input Arguments:
312: + sf - star forest
313: . unit - data type
314: . array - array associated with window
315: . epoch - close an epoch, must match argument to PetscSFGetWindow()
316: - win - window
318: Level: developer
320: .seealso: PetscSFFindWindow()
321: @*/
322: static PetscErrorCode PetscSFRestoreWindow(PetscSF sf,MPI_Datatype unit,const void *array,PetscBool epoch,PetscMPIInt fenceassert,MPI_Win *win)
323: {
324: PetscSF_Window *w = (PetscSF_Window*)sf->data;
326: PetscSFWinLink *p,link;
329: for (p=&w->wins; *p; p=&(*p)->next) {
330: link = *p;
331: if (*win == link->win) {
332: if (array != link->addr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Matched window, but not array");
333: if (epoch != link->epoch) {
334: if (epoch) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"No epoch to end");
335: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Restoring window without ending epoch");
336: }
337: *p = link->next;
338: goto found;
339: }
340: }
341: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
343: found:
344: if (epoch) {
345: switch (w->sync) {
346: case PETSCSF_WINDOW_SYNC_FENCE:
347: MPI_Win_fence(fenceassert,*win);
348: break;
349: case PETSCSF_WINDOW_SYNC_LOCK:
350: break; /* handled outside */
351: case PETSCSF_WINDOW_SYNC_ACTIVE: {
352: MPI_Win_complete(*win);
353: MPI_Win_wait(*win);
354: } break;
355: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
356: }
357: }
359: MPI_Win_free(&link->win);
360: PetscFree(link);
361: *win = MPI_WIN_NULL;
362: return(0);
363: }
367: static PetscErrorCode PetscSFSetUp_Window(PetscSF sf)
368: {
369: PetscSF_Window *w = (PetscSF_Window*)sf->data;
371: MPI_Group ingroup,outgroup;
374: switch (w->sync) {
375: case PETSCSF_WINDOW_SYNC_ACTIVE:
376: PetscSFGetGroups(sf,&ingroup,&outgroup);
377: default:
378: break;
379: }
380: return(0);
381: }
385: static PetscErrorCode PetscSFSetFromOptions_Window(PetscSF sf)
386: {
387: PetscSF_Window *w = (PetscSF_Window*)sf->data;
391: PetscOptionsHead("PetscSF Window options");
392: PetscOptionsEnum("-sf_window_sync","synchronization type to use for PetscSF Window communication","PetscSFWindowSetSyncType",PetscSFWindowSyncTypes,(PetscEnum)w->sync,(PetscEnum*)&w->sync,NULL);
393: PetscOptionsTail();
394: return(0);
395: }
399: static PetscErrorCode PetscSFReset_Window(PetscSF sf)
400: {
401: PetscSF_Window *w = (PetscSF_Window*)sf->data;
402: PetscErrorCode ierr;
403: PetscSFDataLink link,next;
404: PetscSFWinLink wlink,wnext;
405: PetscInt i;
408: for (link=w->link; link; link=next) {
409: next = link->next;
410: MPI_Type_free(&link->unit);
411: for (i=0; i<sf->nranks; i++) {
412: MPI_Type_free(&link->mine[i]);
413: MPI_Type_free(&link->remote[i]);
414: }
415: PetscFree2(link->mine,link->remote);
416: PetscFree(link);
417: }
418: w->link = NULL;
419: for (wlink=w->wins; wlink; wlink=wnext) {
420: wnext = wlink->next;
421: if (wlink->inuse) SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Window still in use with address %p",(void*)wlink->addr);
422: MPI_Win_free(&wlink->win);
423: PetscFree(wlink);
424: }
425: w->wins = NULL;
426: return(0);
427: }
431: static PetscErrorCode PetscSFDestroy_Window(PetscSF sf)
432: {
436: PetscSFReset_Window(sf);
437: PetscFree(sf->data);
438: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",NULL);
439: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",NULL);
440: return(0);
441: }
445: static PetscErrorCode PetscSFView_Window(PetscSF sf,PetscViewer viewer)
446: {
447: PetscSF_Window *w = (PetscSF_Window*)sf->data;
449: PetscBool iascii;
452: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
453: if (iascii) {
454: PetscViewerASCIIPrintf(viewer," synchronization=%s sort=%s\n",PetscSFWindowSyncTypes[w->sync],sf->rankorder ? "rank-order" : "unordered");
455: }
456: return(0);
457: }
461: static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf,PetscSFDuplicateOption opt,PetscSF newsf)
462: {
463: PetscSF_Window *w = (PetscSF_Window*)sf->data;
464: PetscErrorCode ierr;
465: PetscSFWindowSyncType synctype;
468: synctype = w->sync;
469: if (!sf->setupcalled) {
470: /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
471: synctype = PETSCSF_WINDOW_SYNC_LOCK;
472: }
473: PetscSFWindowSetSyncType(newsf,synctype);
474: return(0);
475: }
479: static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
480: {
481: PetscSF_Window *w = (PetscSF_Window*)sf->data;
482: PetscErrorCode ierr;
483: PetscInt i,nranks;
484: const PetscMPIInt *ranks;
485: const MPI_Datatype *mine,*remote;
486: MPI_Win win;
489: PetscSFGetRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
490: PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);
491: PetscSFGetWindow(sf,unit,(void*)rootdata,PETSC_TRUE,MPI_MODE_NOPUT|MPI_MODE_NOPRECEDE,MPI_MODE_NOPUT,0,&win);
492: for (i=0; i<nranks; i++) {
493: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);}
494: MPI_Get(leafdata,1,mine[i],ranks[i],0,1,remote[i],win);
495: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_unlock(ranks[i],win);}
496: }
497: return(0);
498: }
502: PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
503: {
505: MPI_Win win;
508: PetscSFFindWindow(sf,unit,rootdata,&win);
509: PetscSFRestoreWindow(sf,unit,rootdata,PETSC_TRUE,MPI_MODE_NOSTORE|MPI_MODE_NOSUCCEED,&win);
510: return(0);
511: }
515: PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
516: {
517: PetscSF_Window *w = (PetscSF_Window*)sf->data;
518: PetscErrorCode ierr;
519: PetscInt i,nranks;
520: const PetscMPIInt *ranks;
521: const MPI_Datatype *mine,*remote;
522: MPI_Win win;
525: PetscSFGetRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
526: PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);
527: PetscSFWindowOpTranslate(&op);
528: PetscSFGetWindow(sf,unit,rootdata,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&win);
529: for (i=0; i<nranks; i++) {
530: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);}
531: MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],0,1,remote[i],op,win);
532: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_unlock(ranks[i],win);}
533: }
534: return(0);
535: }
539: static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
540: {
541: PetscSF_Window *w = (PetscSF_Window*)sf->data;
543: MPI_Win win;
546: if (!w->wins) return(0);
547: PetscSFFindWindow(sf,unit,rootdata,&win);
548: MPI_Win_fence(MPI_MODE_NOSUCCEED,win);
549: PetscSFRestoreWindow(sf,unit,rootdata,PETSC_TRUE,MPI_MODE_NOSUCCEED,&win);
550: return(0);
551: }
554: static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
555: {
556: PetscErrorCode ierr;
557: PetscInt i,nranks;
558: const PetscMPIInt *ranks;
559: const MPI_Datatype *mine,*remote;
560: MPI_Win win;
563: PetscSFGetRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
564: PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);
565: PetscSFWindowOpTranslate(&op);
566: PetscSFGetWindow(sf,unit,rootdata,PETSC_FALSE,0,0,0,&win);
567: for (i=0; i<sf->nranks; i++) {
568: MPI_Win_lock(MPI_LOCK_EXCLUSIVE,sf->ranks[i],0,win);
569: MPI_Get(leafupdate,1,mine[i],ranks[i],0,1,remote[i],win);
570: MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],0,1,remote[i],op,win);
571: MPI_Win_unlock(ranks[i],win);
572: }
573: return(0);
574: }
578: static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
579: {
581: MPI_Win win;
584: PetscSFFindWindow(sf,unit,rootdata,&win);
585: /* Nothing to do currently because MPI_LOCK_EXCLUSIVE is used in PetscSFFetchAndOpBegin(), rendering this implementation synchronous. */
586: PetscSFRestoreWindow(sf,unit,rootdata,PETSC_FALSE,0,&win);
587: return(0);
588: }
592: PETSC_EXTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf)
593: {
594: PetscSF_Window *w = (PetscSF_Window*)sf->data;
598: sf->ops->SetUp = PetscSFSetUp_Window;
599: sf->ops->SetFromOptions = PetscSFSetFromOptions_Window;
600: sf->ops->Reset = PetscSFReset_Window;
601: sf->ops->Destroy = PetscSFDestroy_Window;
602: sf->ops->View = PetscSFView_Window;
603: sf->ops->Duplicate = PetscSFDuplicate_Window;
604: sf->ops->BcastBegin = PetscSFBcastBegin_Window;
605: sf->ops->BcastEnd = PetscSFBcastEnd_Window;
606: sf->ops->ReduceBegin = PetscSFReduceBegin_Window;
607: sf->ops->ReduceEnd = PetscSFReduceEnd_Window;
608: sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
609: sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Window;
611: PetscNewLog(sf,PetscSF_Window,&w);
612: sf->data = (void*)w;
613: w->sync = PETSCSF_WINDOW_SYNC_FENCE;
615: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",PetscSFWindowSetSyncType_Window);
616: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",PetscSFWindowGetSyncType_Window);
618: #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6))
619: {
620: PetscBool ackbug = PETSC_FALSE;
621: PetscOptionsGetBool(NULL,"-acknowledge_ompi_onesided_bug",&ackbug,NULL);
622: if (ackbug) {
623: PetscInfo(sf,"Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.");
624: } else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_LIB,"Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
625: }
626: #endif
627: return(0);
628: }