Actual source code: sf.c
petsc-3.3-p7 2013-05-11
1: #include <petsc-private/sfimpl.h>
2: #include <petscctable.h>
4: #if defined(PETSC_USE_DEBUG)
5: # define PetscSFCheckGraphSet(sf,arg) do { \
6: if (PetscUnlikely(!(sf)->graphset)) \
7: SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
8: } while (0)
9: #else
10: # define PetscSFCheckGraphSet(sf,arg) do {} while (0)
11: #endif
13: const char *const PetscSFSynchronizationTypes[] = {"FENCE","LOCK","ACTIVE","PetscSFSynchronizationType","PETSCSF_SYNCHRONIZATION_",0};
15: #if !defined(PETSC_HAVE_MPI_WIN_CREATE)
16: #define MPI_WIN_NULL ((MPI_Win)0)
17: #define MPI_REPLACE 0
18: #define MPI_Win_create(base,size,disp_unit,info,comm,win) 1;SETERRQ(comm,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
19: #define MPI_Win_free(win) 1;SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
20: #define MPI_Win_start(group,assert,win) 1;SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
21: #define MPI_Win_post(group,assert,win) 1;SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
22: #define MPI_Win_complete(win) 1;SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
23: #define MPI_Win_wait(win) 1;SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
24: #define MPI_Win_lock(lock_type,rank,assert,win) 1;SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
25: #define MPI_Win_unlock(rank,win) 1;SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
26: #define MPI_Win_fence(assert,win) 1;SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
27: #define MPI_Get(origin_addr,origin_count,origin_datatype,target_rank,target_displ,target_count,target_datatype,win) 1;SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
28: #define MPI_Accumulate(origin_addr,origin_count,origin_datatype,target_rank,target_displ,target_count,target_datatype,op,win) 1;SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
29: /* Independent of MPI_Win, but also not in MPI-1 */
30: #define MPI_Type_get_envelope(datatype,num_ints,num_addrs,num_dtypes,combiner) (*(num_ints)=0,*(num_addrs)=0,*(num_dtypes)=0,*(combiner)=0,1);SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
31: #define MPI_Type_get_contents(datatype,num_ints,num_addrs,num_dtypes,ints,addrs,dtypes) (*(ints)=0,*(addrs)=0,*(dtypes)=0,1);SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
32: #define MPI_Type_dup(datatype,newtype) (*(newtype)=0,1);SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
33: #define MPI_Type_create_indexed_block(count,blocklength,displs,oldtype,newtype) (*(newtype)=0,1);SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
34: #define MPI_Type_get_true_extent(type,lb,bytes) (*(lb)=0,*(bytes)=0,1);SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
35: #ifndef PETSC_HAVE_MPIUNI
36: #define MPI_Type_get_extent(type,lb,bytes) (*(lb)=0,*(bytes)=0,1);SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"Need an MPI-2 implementation")
37: #endif
38: #define MPI_COMBINER_DUP 0
39: #define MPI_MODE_NOPUT 0
40: #define MPI_MODE_NOPRECEDE 0
41: #define MPI_MODE_NOSUCCEED 0
42: #define MPI_MODE_NOSTORE 0
43: #endif
47: /*@C
48: PetscSFCreate - create a star forest communication context
50: Not Collective
52: Input Arguments:
53: . comm - communicator on which the star forest will operate
55: Output Arguments:
56: . sf - new star forest context
58: Level: intermediate
60: .seealso: PetscSFSetGraph(), PetscSFDestroy()
61: @*/
62: PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
63: {
65: PetscSF b;
69: #ifndef PETSC_USE_DYNAMIC_LIBRARIES
70: PetscSFInitializePackage(PETSC_NULL);
71: #endif
73: PetscHeaderCreate(b,_p_PetscSF,struct _PetscSFOps,PETSCSF_CLASSID,-1,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);
74: b->nroots = -1;
75: b->nleaves = -1;
76: b->nranks = -1;
77: b->sync = PETSCSF_SYNCHRONIZATION_FENCE;
78: b->rankorder = PETSC_TRUE;
79: b->ingroup = MPI_GROUP_NULL;
80: b->outgroup = MPI_GROUP_NULL;
81: b->graphset = PETSC_FALSE;
82: *sf = b;
83: return(0);
84: }
88: /*@C
89: PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
91: Collective
93: Input Arguments:
94: . sf - star forest
96: Level: advanced
98: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
99: @*/
100: PetscErrorCode PetscSFReset(PetscSF sf)
101: {
103: PetscSFDataLink link,next;
104: PetscSFWinLink wlink,wnext;
105: PetscInt i;
109: sf->mine = PETSC_NULL;
110: PetscFree(sf->mine_alloc);
111: sf->remote = PETSC_NULL;
112: PetscFree(sf->remote_alloc);
113: PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);
114: PetscFree(sf->degree);
115: for (link=sf->link; link; link=next) {
116: next = link->next;
117: MPI_Type_free(&link->unit);
118: for (i=0; i<sf->nranks; i++) {
119: MPI_Type_free(&link->mine[i]);
120: MPI_Type_free(&link->remote[i]);
121: }
122: PetscFree2(link->mine,link->remote);
123: PetscFree(link);
124: }
125: sf->link = PETSC_NULL;
126: for (wlink=sf->wins; wlink; wlink=wnext) {
127: wnext = wlink->next;
128: if (wlink->inuse) SETERRQ1(((PetscObject)sf)->comm,PETSC_ERR_ARG_WRONGSTATE,"Window still in use with address %p",(void*)wlink->addr);
129: MPI_Win_free(&wlink->win);
130: PetscFree(wlink);
131: }
132: sf->wins = PETSC_NULL;
133: if (sf->ingroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->ingroup);}
134: if (sf->outgroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->outgroup);}
135: PetscSFDestroy(&sf->multi);
136: sf->graphset = PETSC_FALSE;
137: return(0);
138: }
142: /*@C
143: PetscSFDestroy - destroy star forest
145: Collective
147: Input Arguments:
148: . sf - address of star forest
150: Level: intermediate
152: .seealso: PetscSFCreate(), PetscSFReset()
153: @*/
154: PetscErrorCode PetscSFDestroy(PetscSF *sf)
155: {
159: if (!*sf) return(0);
161: if (--((PetscObject)(*sf))->refct > 0) {*sf = 0; return(0);}
162: PetscSFReset(*sf);
163: PetscHeaderDestroy(sf);
164: return(0);
165: }
169: /*@C
170: PetscSFSetFromOptions - set PetscSF options using the options database
172: Logically Collective
174: Input Arguments:
175: . sf - star forest
177: Options Database Keys:
178: . -sf_synchronization - synchronization type used by PetscSF
180: Level: intermediate
182: .keywords: KSP, set, from, options, database
184: .seealso: PetscSFSetSynchronizationType()
185: @*/
186: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
187: {
192: PetscObjectOptionsBegin((PetscObject)sf);
193: PetscOptionsEnum("-sf_synchronization","synchronization type to use for PetscSF communication","PetscSFSetSynchronizationType",PetscSFSynchronizationTypes,(PetscEnum)sf->sync,(PetscEnum*)&sf->sync,PETSC_NULL);
194: PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,PETSC_NULL);
195: PetscOptionsEnd();
196: return(0);
197: }
201: /*@C
202: PetscSFSetSynchronizationType - set synchrozitaion type for PetscSF communication
204: Logically Collective
206: Input Arguments:
207: + sf - star forest for communication
208: - sync - synchronization type
210: Options Database Key:
211: . -sf_synchronization <sync> - sets the synchronization type
213: Level: intermediate
215: .seealso: PetscSFSetFromOptions()
216: @*/
217: PetscErrorCode PetscSFSetSynchronizationType(PetscSF sf,PetscSFSynchronizationType sync)
218: {
223: sf->sync = sync;
224: return(0);
225: }
229: /*@C
230: PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
232: Logically Collective
234: Input Arguments:
235: + sf - star forest
236: - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
238: Level: advanced
240: .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
241: @*/
242: PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
243: {
248: if (sf->multi) SETERRQ(((PetscObject)sf)->comm,PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
249: sf->rankorder = flg;
250: return(0);
251: }
255: /*@C
256: PetscSFSetGraph - Set a parallel star forest
258: Collective
260: Input Arguments:
261: + sf - star forest
262: . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
263: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
264: . ilocal - locations of leaves in leafdata buffers, pass PETSC_NULL for contiguous storage
265: . localmode - copy mode for ilocal
266: . iremote - remote locations of root vertices for each leaf on the current process
267: - remotemode - copy mode for iremote
269: Level: intermediate
271: .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
272: @*/
273: PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
274: {
276: PetscTable table;
277: PetscTablePosition pos;
278: PetscMPIInt size;
279: PetscInt i,*rcount,*ranks;
285: if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"roots %D, cannot be negative",nroots);
286: if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
287: PetscSFReset(sf);
288: sf->nroots = nroots;
289: sf->nleaves = nleaves;
290: if (ilocal) {
291: switch (localmode) {
292: case PETSC_COPY_VALUES:
293: PetscMalloc(nleaves*sizeof(*sf->mine),&sf->mine_alloc);
294: sf->mine = sf->mine_alloc;
295: PetscMemcpy(sf->mine,ilocal,nleaves*sizeof(*sf->mine));
296: break;
297: case PETSC_OWN_POINTER:
298: sf->mine_alloc = (PetscInt*)ilocal;
299: sf->mine = sf->mine_alloc;
300: break;
301: case PETSC_USE_POINTER:
302: sf->mine = (PetscInt*)ilocal;
303: break;
304: default: SETERRQ(((PetscObject)sf)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
305: }
306: }
307: switch (remotemode) {
308: case PETSC_COPY_VALUES:
309: PetscMalloc(nleaves*sizeof(*sf->remote),&sf->remote_alloc);
310: sf->remote = sf->remote_alloc;
311: PetscMemcpy(sf->remote,iremote,nleaves*sizeof(*sf->remote));
312: break;
313: case PETSC_OWN_POINTER:
314: sf->remote_alloc = (PetscSFNode*)iremote;
315: sf->remote = sf->remote_alloc;
316: break;
317: case PETSC_USE_POINTER:
318: sf->remote = (PetscSFNode*)iremote;
319: break;
320: default: SETERRQ(((PetscObject)sf)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
321: }
323: MPI_Comm_size(((PetscObject)sf)->comm,&size);
324: PetscTableCreate(10,size,&table);
325: for (i=0; i<nleaves; i++) {
326: /* Log 1-based rank */
327: PetscTableAdd(table,iremote[i].rank+1,1,ADD_VALUES);
328: }
329: PetscTableGetCount(table,&sf->nranks);
330: PetscMalloc4(sf->nranks,PetscInt,&sf->ranks,sf->nranks+1,PetscInt,&sf->roffset,nleaves,PetscMPIInt,&sf->rmine,nleaves,PetscMPIInt,&sf->rremote);
331: PetscMalloc2(sf->nranks,PetscInt,&rcount,sf->nranks,PetscInt,&ranks);
332: PetscTableGetHeadPosition(table,&pos);
333: for (i=0; i<sf->nranks; i++) {
334: PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);
335: ranks[i]--; /* Convert back to 0-based */
336: }
337: PetscTableDestroy(&table);
338: PetscSortIntWithArray(sf->nranks,ranks,rcount);
339: sf->roffset[0] = 0;
340: for (i=0; i<sf->nranks; i++) {
341: sf->ranks[i] = PetscMPIIntCast(ranks[i]);
342: sf->roffset[i+1] = sf->roffset[i] + rcount[i];
343: rcount[i] = 0;
344: }
345: for (i=0; i<nleaves; i++) {
346: PetscInt lo,hi,irank;
347: /* Search for index of iremote[i].rank in sf->ranks */
348: lo = 0; hi = sf->nranks;
349: while (hi - lo > 1) {
350: PetscInt mid = lo + (hi - lo)/2;
351: if (iremote[i].rank < sf->ranks[mid]) hi = mid;
352: else lo = mid;
353: }
354: if (hi - lo == 1 && iremote[i].rank == sf->ranks[lo]) irank = lo;
355: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",iremote[i].rank);
356: sf->rmine[sf->roffset[irank] + rcount[irank]] = ilocal ? ilocal[i] : i;
357: sf->rremote[sf->roffset[irank] + rcount[irank]] = iremote[i].index;
358: rcount[irank]++;
359: }
360: PetscFree2(rcount,ranks);
361: #if !defined(PETSC_USE_64BIT_INDICES)
362: if (nroots == PETSC_DETERMINE) {
363: /* Jed, if you have a better way to do this, put it in */
364: PetscInt *numRankLeaves, *leafOff, *leafIndices, *numRankRoots, *rootOff, *rootIndices, maxRoots = 0;
366: /* All to all to determine number of leaf indices from each (you can do this using Scan and asynch messages) */
367: PetscMalloc4(size,PetscInt,&numRankLeaves,size+1,PetscInt,&leafOff,size,PetscInt,&numRankRoots,size+1,PetscInt,&rootOff);
368: PetscMemzero(numRankLeaves, size * sizeof(PetscInt));
369: for(i = 0; i < nleaves; ++i) {
370: ++numRankLeaves[iremote[i].rank];
371: }
372: MPI_Alltoall(numRankLeaves, 1, MPIU_INT, numRankRoots, 1, MPIU_INT, ((PetscObject) sf)->comm);
373: /* Could set nroots to this maximum */
374: for(i = 0; i < size; ++i) {
375: maxRoots += numRankRoots[i];
376: }
377: /* Gather all indices */
378: PetscMalloc2(nleaves,PetscInt,&leafIndices,maxRoots,PetscInt,&rootIndices);
379: leafOff[0] = 0;
380: for(i = 0; i < size; ++i) {
381: leafOff[i+1] = leafOff[i] + numRankLeaves[i];
382: }
383: for(i = 0; i < nleaves; ++i) {
384: leafIndices[leafOff[iremote[i].rank]++] = iremote[i].index;
385: }
386: leafOff[0] = 0;
387: for(i = 0; i < size; ++i) {
388: leafOff[i+1] = leafOff[i] + numRankLeaves[i];
389: }
390: rootOff[0] = 0;
391: for(i = 0; i < size; ++i) {
392: rootOff[i+1] = rootOff[i] + numRankRoots[i];
393: }
394: MPI_Alltoallv(leafIndices, numRankLeaves, leafOff, MPIU_INT, rootIndices, numRankRoots, rootOff, MPIU_INT, ((PetscObject) sf)->comm);
395: /* Sort and reduce */
396: PetscSortRemoveDupsInt(&maxRoots, rootIndices);
397: PetscFree2(leafIndices,rootIndices);
398: PetscFree4(numRankLeaves,leafOff,numRankRoots,rootOff);
399: sf->nroots = maxRoots;
400: }
401: #endif
403: sf->graphset = PETSC_TRUE;
404: return(0);
405: }
409: /*@C
410: PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
412: Collective
414: Input Arguments:
415: . sf - star forest to invert
417: Output Arguments:
418: . isf - inverse of sf
420: Level: advanced
422: Notes:
423: All roots must have degree 1.
425: The local space may be a permutation, but cannot be sparse.
427: .seealso: PetscSFSetGraph()
428: @*/
429: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
430: {
432: PetscMPIInt rank;
433: PetscInt i,nroots,nleaves,maxlocal,count,*newilocal;
434: const PetscInt *ilocal;
435: PetscSFNode *roots,*leaves;
438: MPI_Comm_rank(((PetscObject)sf)->comm,&rank);
439: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,PETSC_NULL);
440: for (i=0,maxlocal=0; i<nleaves; i++) maxlocal = PetscMax(maxlocal,(ilocal?ilocal[i]:i)+1);
441: PetscMalloc2(nroots,PetscSFNode,&roots,nleaves,PetscSFNode,&leaves);
442: for (i=0; i<nleaves; i++) {
443: leaves[i].rank = rank;
444: leaves[i].index = i;
445: }
446: for (i=0;i <nroots; i++) {
447: roots[i].rank = -1;
448: roots[i].index = -1;
449: }
450: PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);
451: PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);
453: /* Check whether our leaves are sparse */
454: for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
455: if (count == nroots) newilocal = PETSC_NULL;
456: else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
457: PetscMalloc(count*sizeof(PetscInt),&newilocal);
458: for (i=0,count=0; i<nroots; i++) {
459: if (roots[i].rank >= 0) {
460: newilocal[count] = i;
461: roots[count].rank = roots[i].rank;
462: roots[count].index = roots[i].index;
463: count++;
464: }
465: }
466: }
468: PetscSFCreate(((PetscObject)sf)->comm,isf);
469: PetscSFSetSynchronizationType(*isf,sf->sync);
470: PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);
471: PetscFree2(roots,leaves);
472: return(0);
473: }
477: /*@C
478: PetscSFGetGraph - Get the graph specifying a parallel star forest
480: Collective
482: Input Arguments:
483: + sf - star forest
484: . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
485: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
486: . ilocal - locations of leaves in leafdata buffers
487: - iremote - remote locations of root vertices for each leaf on the current process
489: Level: intermediate
491: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
492: @*/
493: PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
494: {
498: if (nroots) *nroots = sf->nroots;
499: if (nleaves) *nleaves = sf->nleaves;
500: if (ilocal) *ilocal = sf->mine;
501: if (iremote) *iremote = sf->remote;
502: return(0);
503: }
507: /*@C
508: PetscSFView - view a star forest
510: Collective
512: Input Arguments:
513: + sf - star forest
514: - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
516: Level: beginner
518: .seealso: PetscSFCreate(), PetscSFSetGraph()
519: @*/
520: PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
521: {
523: PetscBool iascii;
527: if (!viewer) {PetscViewerASCIIGetStdout(((PetscObject)sf)->comm,&viewer);}
530: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
531: if (iascii) {
532: PetscMPIInt rank;
533: PetscInt i,j;
534: PetscBool verbose;
535: PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer,"Star Forest Object");
536: PetscViewerASCIIPushTab(viewer);
537: PetscViewerASCIIPrintf(viewer,"synchronization=%s sort=%s\n",PetscSFSynchronizationTypes[sf->sync],sf->rankorder?"rank-order":"unordered");
538: MPI_Comm_rank(((PetscObject)sf)->comm,&rank);
539: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
540: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);
541: for (i=0; i<sf->nleaves; i++) {
542: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine?sf->mine[i]:i,sf->remote[i].rank,sf->remote[i].index);
543: }
544: PetscViewerFlush(viewer);
545: verbose = PETSC_FALSE;
546: PetscOptionsGetBool(((PetscObject)sf)->prefix,"-sf_view_verbose",&verbose,PETSC_NULL);
547: if (verbose) {
548: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);
549: for (i=0; i<sf->nranks; i++) {
550: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);
551: for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
552: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);
553: }
554: }
555: }
556: PetscViewerFlush(viewer);
557: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
558: PetscViewerASCIIPopTab(viewer);
559: }
560: return(0);
561: }
565: static PetscErrorCode MPIU_Type_unwrap(MPI_Datatype a,MPI_Datatype *atype)
566: {
567: PetscMPIInt nints,naddrs,ntypes,combiner;
571: MPI_Type_get_envelope(a,&nints,&naddrs,&ntypes,&combiner);
572: if (combiner == MPI_COMBINER_DUP) {
573: PetscMPIInt ints[1];
574: MPI_Aint addrs[1];
575: MPI_Datatype types[1];
576: if (nints != 0 || naddrs != 0 || ntypes != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Unexpected returns from MPI_Type_get_envelope()");
577: MPI_Type_get_contents(a,0,0,1,ints,addrs,types);
578: *atype = types[0];
579: } else {
580: *atype = a;
581: }
582: return(0);
583: }
587: static PetscErrorCode MPIU_Type_compare(MPI_Datatype a,MPI_Datatype b,PetscBool *match)
588: {
590: MPI_Datatype atype,btype;
591: PetscMPIInt aintcount,aaddrcount,atypecount,acombiner;
592: PetscMPIInt bintcount,baddrcount,btypecount,bcombiner;
595: MPIU_Type_unwrap(a,&atype);
596: MPIU_Type_unwrap(b,&btype);
597: *match = PETSC_FALSE;
598: if (atype == btype) {
599: *match = PETSC_TRUE;
600: return(0);
601: }
602: MPI_Type_get_envelope(atype,&aintcount,&aaddrcount,&atypecount,&acombiner);
603: MPI_Type_get_envelope(btype,&bintcount,&baddrcount,&btypecount,&bcombiner);
604: if (acombiner == bcombiner && aintcount == bintcount && aaddrcount == baddrcount && atypecount == btypecount && (aintcount > 0 || aaddrcount > 0 || atypecount > 0)) {
605: PetscMPIInt *aints,*bints;
606: MPI_Aint *aaddrs,*baddrs;
607: MPI_Datatype *atypes,*btypes;
608: PetscBool same;
609: PetscMalloc6(aintcount,PetscMPIInt,&aints,bintcount,PetscMPIInt,&bints,aaddrcount,MPI_Aint,&aaddrs,baddrcount,MPI_Aint,&baddrs,atypecount,MPI_Datatype,&atypes,btypecount,MPI_Datatype,&btypes);
610: MPI_Type_get_contents(atype,aintcount,aaddrcount,atypecount,aints,aaddrs,atypes);
611: MPI_Type_get_contents(btype,bintcount,baddrcount,btypecount,bints,baddrs,btypes);
612: PetscMemcmp(aints,bints,aintcount*sizeof(aints[0]),&same);
613: if (same) {
614: PetscMemcmp(aaddrs,baddrs,aaddrcount*sizeof(aaddrs[0]),&same);
615: if (same) {
616: /* This comparison should be recursive */
617: PetscMemcmp(atypes,btypes,atypecount*sizeof(atypes[0]),&same);
618: }
619: }
620: if (same) *match = PETSC_TRUE;
621: return(0);
622: }
623: return(0);
624: }
628: /*@C
629: PetscSFGetDataTypes - gets composite local and remote data types for each rank
631: Not Collective
633: Input Arguments:
634: + sf - star forest
635: - unit - data type for each node
637: Output Arguments:
638: + localtypes - types describing part of local leaf buffer referencing each remote rank
639: - remotetypes - types describing part of remote root buffer referenced for each remote rank
641: Level: developer
643: .seealso: PetscSFSetGraph(), PetscSFView()
644: @*/
645: PetscErrorCode PetscSFGetDataTypes(PetscSF sf,MPI_Datatype unit,const MPI_Datatype **localtypes,const MPI_Datatype **remotetypes)
646: {
648: PetscSFDataLink link;
649: PetscInt i,nranks;
650: const PetscInt *roffset;
651: const PetscMPIInt *ranks,*rmine,*rremote;
654: /* Look for types in cache */
655: for (link=sf->link; link; link=link->next) {
656: PetscBool match;
657: MPIU_Type_compare(unit,link->unit,&match);
658: if (match) {
659: *localtypes = link->mine;
660: *remotetypes = link->remote;
661: return(0);
662: }
663: }
665: /* Create new composite types for each send rank */
666: PetscSFGetRanks(sf,&nranks,&ranks,&roffset,&rmine,&rremote);
667: PetscMalloc(sizeof(*link),&link);
668: MPI_Type_dup(unit,&link->unit);
669: PetscMalloc2(nranks,MPI_Datatype,&link->mine,nranks,MPI_Datatype,&link->remote);
670: for (i=0; i<nranks; i++) {
671: PETSC_UNUSED PetscInt rcount = roffset[i+1] - roffset[i];
672: MPI_Type_create_indexed_block(rcount,1,sf->rmine+sf->roffset[i],link->unit,&link->mine[i]);
673: MPI_Type_create_indexed_block(rcount,1,sf->rremote+sf->roffset[i],link->unit,&link->remote[i]);
674: MPI_Type_commit(&link->mine[i]);
675: MPI_Type_commit(&link->remote[i]);
676: }
677: link->next = sf->link;
678: sf->link = link;
680: *localtypes = link->mine;
681: *remotetypes = link->remote;
682: return(0);
683: }
687: /*@C
688: PetscSFGetRanks - Get ranks and number of vertices referenced by leaves on this process
690: Not Collective
692: Input Arguments:
693: . sf - star forest
695: Output Arguments:
696: + nranks - number of ranks referenced by local part
697: . ranks - array of ranks
698: . roffset - offset in rmine/rremote for each rank (length nranks+1)
699: . rmine - concatenated array holding local indices referencing each remote rank
700: - rremote - concatenated array holding remote indices referenced for each remote rank
702: Level: developer
704: .seealso: PetscSFSetGraph(), PetscSFGetDataTypes()
705: @*/
706: PetscErrorCode PetscSFGetRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscMPIInt **rmine,const PetscMPIInt **rremote)
707: {
711: if (nranks) *nranks = sf->nranks;
712: if (ranks) *ranks = sf->ranks;
713: if (roffset) *roffset = sf->roffset;
714: if (rmine) *rmine = sf->rmine;
715: if (rremote) *rremote = sf->rremote;
716: return(0);
717: }
721: /*@C
722: PetscSFGetWindow - Get a window for use with a given data type
724: Collective on PetscSF
726: Input Arguments:
727: + sf - star forest
728: . unit - data type
729: . array - array to be sent
730: . epoch - PETSC_TRUE to acquire the window and start an epoch, PETSC_FALSE to just acquire the window
731: . fenceassert - assert parameter for call to MPI_Win_fence(), if PETSCSF_SYNCHRONIZATION_FENCE
732: . postassert - assert parameter for call to MPI_Win_post(), if PETSCSF_SYNCHRONIZATION_ACTIVE
733: - startassert - assert parameter for call to MPI_Win_start(), if PETSCSF_SYNCHRONIZATION_ACTIVE
735: Output Arguments:
736: . win - window
738: Level: developer
740: Developer Notes:
741: This currently always creates a new window. This is more synchronous than necessary. An alternative is to try to
742: reuse an existing window created with the same array. Another alternative is to maintain a cache of windows and reuse
743: whichever one is available, by copying the array into it if necessary.
745: .seealso: PetscSFGetRanks(), PetscSFGetDataTypes()
746: @*/
747: PetscErrorCode PetscSFGetWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscBool epoch,PetscMPIInt fenceassert,PetscMPIInt postassert,PetscMPIInt startassert,MPI_Win *win)
748: {
750: MPI_Aint lb,lb_true,bytes,bytes_true;
751: PetscSFWinLink link;
754: MPI_Type_get_extent(unit,&lb,&bytes);
755: MPI_Type_get_true_extent(unit,&lb_true,&bytes_true);
756: if (lb != 0 || lb_true != 0) SETERRQ(((PetscObject)sf)->comm,PETSC_ERR_SUP,"No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
757: if (bytes != bytes_true) SETERRQ(((PetscObject)sf)->comm,PETSC_ERR_SUP,"No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature");
758: PetscMalloc(sizeof(*link),&link);
759: link->bytes = bytes;
760: link->addr = array;
761: MPI_Win_create(array,(MPI_Aint)bytes*sf->nroots,(PetscMPIInt)bytes,MPI_INFO_NULL,((PetscObject)sf)->comm,&link->win);
762: link->epoch = epoch;
763: link->next = sf->wins;
764: link->inuse = PETSC_TRUE;
765: sf->wins = link;
766: *win = link->win;
768: if (epoch) {
769: switch (sf->sync) {
770: case PETSCSF_SYNCHRONIZATION_FENCE:
771: MPI_Win_fence(fenceassert,*win);
772: break;
773: case PETSCSF_SYNCHRONIZATION_LOCK: /* Handled outside */
774: break;
775: case PETSCSF_SYNCHRONIZATION_ACTIVE: {
776: MPI_Group ingroup,outgroup;
777: PetscSFGetGroups(sf,&ingroup,&outgroup);
778: MPI_Win_post(ingroup,postassert,*win);
779: MPI_Win_start(outgroup,startassert,*win);
780: } break;
781: default: SETERRQ(((PetscObject)sf)->comm,PETSC_ERR_PLIB,"Unknown synchronization type");
782: }
783: }
784: return(0);
785: }
789: /*@C
790: PetscSFFindWindow - Finds a window that is already in use
792: Not Collective
794: Input Arguments:
795: + sf - star forest
796: . unit - data type
797: - array - array with which the window is associated
799: Output Arguments:
800: . win - window
802: Level: developer
804: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
805: @*/
806: PetscErrorCode PetscSFFindWindow(PetscSF sf,MPI_Datatype unit,const void *array,MPI_Win *win)
807: {
808: PetscSFWinLink link;
811: for (link=sf->wins; link; link=link->next) {
812: if (array == link->addr) {
813: *win = link->win;
814: return(0);
815: }
816: }
817: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
818: return(0);
819: }
823: /*@C
824: PetscSFRestoreWindow - Restores a window obtained with PetscSFGetWindow()
826: Collective
828: Input Arguments:
829: + sf - star forest
830: . unit - data type
831: . array - array associated with window
832: . epoch - close an epoch, must match argument to PetscSFGetWindow()
833: - win - window
835: Level: developer
837: .seealso: PetscSFFindWindow()
838: @*/
839: PetscErrorCode PetscSFRestoreWindow(PetscSF sf,MPI_Datatype unit,const void *array,PetscBool epoch,PetscMPIInt fenceassert,MPI_Win *win)
840: {
842: PetscSFWinLink *p,link;
845: for (p=&sf->wins; *p; p=&(*p)->next) {
846: link = *p;
847: if (*win == link->win) {
848: if (array != link->addr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Matched window, but not array");
849: if (epoch != link->epoch) {
850: if (epoch) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"No epoch to end");
851: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Restoring window without ending epoch");
852: }
853: *p = link->next;
854: goto found;
855: }
856: }
857: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
859: found:
860: if (epoch) {
861: switch (sf->sync) {
862: case PETSCSF_SYNCHRONIZATION_FENCE:
863: MPI_Win_fence(fenceassert,*win);
864: break;
865: case PETSCSF_SYNCHRONIZATION_LOCK:
866: break; /* handled outside */
867: case PETSCSF_SYNCHRONIZATION_ACTIVE: {
868: MPI_Win_complete(*win);
869: MPI_Win_wait(*win);
870: } break;
871: default: SETERRQ(((PetscObject)sf)->comm,PETSC_ERR_PLIB,"Unknown synchronization type");
872: }
873: }
875: MPI_Win_free(&link->win);
876: PetscFree(link);
877: *win = MPI_WIN_NULL;
878: return(0);
879: }
883: /*@C
884: PetscSFGetGroups - gets incoming and outgoing process groups
886: Collective
888: Input Argument:
889: . sf - star forest
891: Output Arguments:
892: + incoming - group of origin processes for incoming edges (leaves that reference my roots)
893: - outgoing - group of destination processes for outgoing edges (roots that I reference)
895: Level: developer
897: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
898: @*/
899: PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
900: {
902: MPI_Group group;
905: if (sf->ingroup == MPI_GROUP_NULL) {
906: PetscInt i;
907: const PetscInt *indegree;
908: PetscMPIInt rank,*outranks,*inranks;
909: PetscSFNode *remote;
910: PetscSF bgcount;
912: /* Compute the number of incoming ranks */
913: PetscMalloc(sf->nranks*sizeof(PetscSFNode),&remote);
914: for (i=0; i<sf->nranks; i++) {
915: remote[i].rank = sf->ranks[i];
916: remote[i].index = 0;
917: }
918: PetscSFCreate(((PetscObject)sf)->comm,&bgcount);
919: PetscSFSetSynchronizationType(bgcount,PETSCSF_SYNCHRONIZATION_LOCK); /* or FENCE, ACTIVE here would cause recursion */
920: PetscSFSetGraph(bgcount,1,sf->nranks,PETSC_NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
921: PetscSFComputeDegreeBegin(bgcount,&indegree);
922: PetscSFComputeDegreeEnd(bgcount,&indegree);
924: /* Enumerate the incoming ranks */
925: PetscMalloc2(indegree[0],PetscMPIInt,&inranks,sf->nranks,PetscMPIInt,&outranks);
926: MPI_Comm_rank(((PetscObject)sf)->comm,&rank);
927: for (i=0; i<sf->nranks; i++) outranks[i] = rank;
928: PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);
929: PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);
930: MPI_Comm_group(((PetscObject)sf)->comm,&group);
931: MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);
932: MPI_Group_free(&group);
933: PetscFree2(inranks,outranks);
934: PetscSFDestroy(&bgcount);
935: }
936: *incoming = sf->ingroup;
938: if (sf->outgroup == MPI_GROUP_NULL) {
939: MPI_Comm_group(((PetscObject)sf)->comm,&group);
940: MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);
941: MPI_Group_free(&group);
942: }
943: *outgoing = sf->outgroup;
944: return(0);
945: }
949: /*@C
950: PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
952: Collective
954: Input Argument:
955: . sf - star forest that may contain roots with 0 or with more than 1 vertex
957: Output Arguments:
958: . multi - star forest with split roots, such that each root has degree exactly 1
960: Level: developer
962: Notes:
964: In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
965: directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
966: edge, it is a candidate for future optimization that might involve its removal.
968: .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin()
969: @*/
970: PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
971: {
977: if (sf->nroots < 0) {
978: PetscSFCreate(((PetscObject)sf)->comm,&sf->multi);
979: *multi = sf->multi;
980: return(0);
981: }
982: if (!sf->multi) {
983: const PetscInt *indegree;
984: PetscInt i,*inoffset,*outones,*outoffset;
985: PetscSFNode *remote;
986: PetscSFComputeDegreeBegin(sf,&indegree);
987: PetscSFComputeDegreeEnd(sf,&indegree);
988: PetscMalloc3(sf->nroots+1,PetscInt,&inoffset,sf->nleaves,PetscInt,&outones,sf->nleaves,PetscInt,&outoffset);
989: inoffset[0] = 0;
990: #if 1
991: for (i=0; i<sf->nroots; i++) inoffset[i+1] = PetscMax(i+1, inoffset[i] + indegree[i]);
992: #else
993: for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
994: #endif
995: for (i=0; i<sf->nleaves; i++) outones[i] = 1;
996: PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPIU_SUM);
997: PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPIU_SUM);
998: for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
999: #if 0
1000: #if defined(PETSC_USE_DEBUG) /* Check that the expected number of increments occurred */
1001: for (i=0; i<sf->nroots; i++) {
1002: if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1003: }
1004: #endif
1005: #endif
1006: PetscMalloc(sf->nleaves*sizeof(*remote),&remote);
1007: for (i=0; i<sf->nleaves; i++) {
1008: remote[i].rank = sf->remote[i].rank;
1009: remote[i].index = outoffset[i];
1010: }
1011: PetscSFCreate(((PetscObject)sf)->comm,&sf->multi);
1012: PetscSFSetSynchronizationType(sf->multi,sf->sync);
1013: PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,PETSC_NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1014: if (sf->rankorder) { /* Sort the ranks */
1015: PetscMPIInt rank;
1016: PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1017: PetscSFNode *newremote;
1018: MPI_Comm_rank(((PetscObject)sf)->comm,&rank);
1019: for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1020: PetscMalloc5(sf->multi->nroots,PetscInt,&inranks,sf->multi->nroots,PetscInt,&newoffset,sf->nleaves,PetscInt,&outranks,sf->nleaves,PetscInt,&newoutoffset,maxdegree,PetscInt,&tmpoffset);
1021: for (i=0; i<sf->nleaves; i++) outranks[i] = rank;
1022: PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);
1023: PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);
1024: /* Sort the incoming ranks at each vertex, build the inverse map */
1025: for (i=0; i<sf->nroots; i++) {
1026: PetscInt j;
1027: for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1028: PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);
1029: for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1030: }
1031: PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);
1032: PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);
1033: PetscMalloc(sf->nleaves*sizeof(*newremote),&newremote);
1034: for (i=0; i<sf->nleaves; i++) {
1035: newremote[i].rank = sf->remote[i].rank;
1036: newremote[i].index = newoutoffset[i];
1037: }
1038: PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,PETSC_NULL,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);
1039: PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);
1040: }
1041: PetscFree3(inoffset,outones,outoffset);
1042: }
1043: *multi = sf->multi;
1044: return(0);
1045: }
1049: /*@C
1050: PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
1052: Collective
1054: Input Arguments:
1055: + sf - original star forest
1056: . nroots - number of roots to select on this process
1057: - selected - selected roots on this process
1059: Output Arguments:
1060: . newsf - new star forest
1062: Level: advanced
1064: Note:
1065: To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1066: be done by calling PetscSFGetGraph().
1068: .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1069: @*/
1070: PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nroots,const PetscInt *selected,PetscSF *newsf)
1071: {
1073: PetscInt i,nleaves,*ilocal,*rootdata,*leafdata;
1074: PetscSFNode *iremote;
1080: PetscMalloc2(sf->nroots,PetscInt,&rootdata,sf->nleaves,PetscInt,&leafdata);
1081: PetscMemzero(rootdata,sf->nroots*sizeof(PetscInt));
1082: PetscMemzero(leafdata,sf->nleaves*sizeof(PetscInt));
1083: for (i=0; i<nroots; i++) rootdata[selected[i]] = 1;
1084: PetscSFBcastBegin(sf,MPIU_INT,rootdata,leafdata);
1085: PetscSFBcastEnd(sf,MPIU_INT,rootdata,leafdata);
1087: for (i=0,nleaves=0; i<sf->nleaves; i++) nleaves += leafdata[i];
1088: PetscMalloc(nleaves*sizeof(PetscInt),&ilocal);
1089: PetscMalloc(nleaves*sizeof(PetscSFNode),&iremote);
1090: for (i=0,nleaves=0; i<sf->nleaves; i++) {
1091: if (leafdata[i]) {
1092: ilocal[nleaves] = sf->mine ? sf->mine[i] : i;
1093: iremote[nleaves].rank = sf->remote[i].rank;
1094: iremote[nleaves].index = sf->remote[i].index;
1095: nleaves++;
1096: }
1097: }
1098: PetscSFCreate(((PetscObject)sf)->comm,newsf);
1099: PetscSFSetSynchronizationType(*newsf,sf->sync);
1100: PetscSFSetGraph(*newsf,sf->nroots,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);
1101: PetscFree2(rootdata,leafdata);
1102: return(0);
1103: }
1107: /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */
1108: static PetscErrorCode PetscSFOpTranslate(MPI_Op *op)
1109: {
1112: if (*op == MPIU_SUM) *op = MPI_SUM;
1113: else if (*op == MPIU_MAX) *op = MPI_MAX;
1114: else if (*op == MPIU_MIN) *op = MPI_MIN;
1115: return(0);
1116: }
1120: /*@C
1121: PetscSFBcastBegin - begin pointwise broadcast to be concluded with call to PetscSFBcastEnd()
1123: Collective on PetscSF
1125: Input Arguments:
1126: + sf - star forest on which to communicate
1127: . unit - data type associated with each node
1128: - rootdata - buffer to broadcast
1130: Output Arguments:
1131: . leafdata - buffer to update with values from each leaf's respective root
1133: Level: intermediate
1135: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFView(), PetscSFBcastEnd(), PetscSFReduceBegin()
1136: @*/
1137: PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
1138: {
1139: PetscErrorCode ierr;
1140: PetscInt i,nranks;
1141: const PetscMPIInt *ranks;
1142: const MPI_Datatype *mine,*remote;
1143: MPI_Win win;
1147: PetscSFCheckGraphSet(sf,1);
1148: PetscSFGetRanks(sf,&nranks,&ranks,PETSC_NULL,PETSC_NULL,PETSC_NULL);
1149: PetscSFGetDataTypes(sf,unit,&mine,&remote);
1150: PetscSFGetWindow(sf,unit,(void*)rootdata,PETSC_TRUE,MPI_MODE_NOPUT|MPI_MODE_NOPRECEDE,MPI_MODE_NOPUT,0,&win);
1151: for (i=0; i<nranks; i++) {
1152: if (sf->sync == PETSCSF_SYNCHRONIZATION_LOCK) {MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);}
1153: MPI_Get(leafdata,1,mine[i],ranks[i],0,1,remote[i],win);
1154: if (sf->sync == PETSCSF_SYNCHRONIZATION_LOCK) {MPI_Win_unlock(ranks[i],win);}
1155: }
1156: return(0);
1157: }
1161: /*@C
1162: PetscSFBcastEnd - end a broadcast operation started with PetscSFBcastBegin()
1164: Collective
1166: Input Arguments:
1167: + sf - star forest
1168: . unit - data type
1169: - rootdata - buffer to broadcast
1171: Output Arguments:
1172: . leafdata - buffer to update with values from each leaf's respective root
1174: Level: intermediate
1176: .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1177: @*/
1178: PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
1179: {
1181: MPI_Win win;
1185: PetscSFCheckGraphSet(sf,1);
1186: PetscSFFindWindow(sf,unit,rootdata,&win);
1187: PetscSFRestoreWindow(sf,unit,rootdata,PETSC_TRUE,MPI_MODE_NOSTORE|MPI_MODE_NOSUCCEED,&win);
1188: return(0);
1189: }
1193: /*@C
1194: PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1196: Collective
1198: Input Arguments:
1199: + sf - star forest
1200: . unit - data type
1201: . leafdata - values to reduce
1202: - op - reduction operation
1204: Output Arguments:
1205: . rootdata - result of reduction of values from all leaves of each root
1207: Level: intermediate
1209: .seealso: PetscSFBcastBegin()
1210: @*/
1211: PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1212: {
1213: PetscErrorCode ierr;
1214: PetscInt i,nranks;
1215: const PetscMPIInt *ranks;
1216: const MPI_Datatype *mine,*remote;
1217: MPI_Win win;
1221: PetscSFCheckGraphSet(sf,1);
1222: PetscSFGetRanks(sf,&nranks,&ranks,PETSC_NULL,PETSC_NULL,PETSC_NULL);
1223: PetscSFGetDataTypes(sf,unit,&mine,&remote);
1224: PetscSFOpTranslate(&op);
1225: PetscSFGetWindow(sf,unit,rootdata,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&win);
1226: for (i=0; i<nranks; i++) {
1227: if (sf->sync == PETSCSF_SYNCHRONIZATION_LOCK) {MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);}
1228: MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],0,1,remote[i],op,win);
1229: if (sf->sync == PETSCSF_SYNCHRONIZATION_LOCK) {MPI_Win_unlock(ranks[i],win);}
1230: }
1231: return(0);
1232: }
1236: /*@C
1237: PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1239: Collective
1241: Input Arguments:
1242: + sf - star forest
1243: . unit - data type
1244: . leafdata - values to reduce
1245: - op - reduction operation
1247: Output Arguments:
1248: . rootdata - result of reduction of values from all leaves of each root
1250: Level: intermediate
1252: .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1253: @*/
1254: PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1255: {
1257: MPI_Win win;
1261: PetscSFCheckGraphSet(sf,1);
1262: if (!sf->wins) {return(0);}
1263: PetscSFFindWindow(sf,unit,rootdata,&win);
1264: MPI_Win_fence(MPI_MODE_NOSUCCEED,win);
1265: PetscSFRestoreWindow(sf,unit,rootdata,PETSC_TRUE,MPI_MODE_NOSUCCEED,&win);
1266: return(0);
1267: }
1271: /*@C
1272: PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1274: Collective
1276: Input Arguments:
1277: . sf - star forest
1279: Output Arguments:
1280: . degree - degree of each root vertex
1282: Level: advanced
1284: .seealso: PetscSFGatherBegin()
1285: @*/
1286: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1287: {
1292: PetscSFCheckGraphSet(sf,1);
1294: if (!sf->degree) {
1295: PetscInt i;
1296: PetscMalloc(sf->nroots*sizeof(PetscInt),&sf->degree);
1297: PetscMalloc(sf->nleaves*sizeof(PetscInt),&sf->degreetmp);
1298: for (i=0; i<sf->nroots; i++) sf->degree[i] = 0;
1299: for (i=0; i<sf->nleaves; i++) sf->degreetmp[i] = 1;
1300: PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPIU_SUM);
1301: }
1302: *degree = PETSC_NULL;
1303: return(0);
1304: }
1308: /*@C
1309: PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1311: Collective
1313: Input Arguments:
1314: . sf - star forest
1316: Output Arguments:
1317: . degree - degree of each root vertex
1319: Level: developer
1321: .seealso:
1322: @*/
1323: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1324: {
1329: PetscSFCheckGraphSet(sf,1);
1330: if (!sf->degreeknown) {
1331: PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPIU_SUM);
1332: PetscFree(sf->degreetmp);
1333: sf->degreeknown = PETSC_TRUE;
1334: }
1335: *degree = sf->degree;
1336: return(0);
1337: }
1341: /*@C
1342: PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1344: Collective
1346: Input Arguments:
1347: + sf - star forest
1348: . unit - data type
1349: . leafdata - leaf values to use in reduction
1350: - op - operation to use for reduction
1352: Output Arguments:
1353: + rootdata - root values to be updated, input state is seen by first process to perform an update
1354: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1356: Level: advanced
1358: Note:
1359: The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1360: might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1361: not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1362: integers.
1364: .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1365: @*/
1366: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1367: {
1369: PetscInt i,nranks;
1370: const PetscMPIInt *ranks;
1371: const MPI_Datatype *mine,*remote;
1372: MPI_Win win;
1376: PetscSFCheckGraphSet(sf,1);
1377: PetscSFGetRanks(sf,&nranks,&ranks,PETSC_NULL,PETSC_NULL,PETSC_NULL);
1378: PetscSFGetDataTypes(sf,unit,&mine,&remote);
1379: PetscSFOpTranslate(&op);
1380: PetscSFGetWindow(sf,unit,rootdata,PETSC_FALSE,0,0,0,&win);
1381: for (i=0; i<sf->nranks; i++) {
1382: MPI_Win_lock(MPI_LOCK_EXCLUSIVE,sf->ranks[i],0,win);
1383: MPI_Get(leafupdate,1,mine[i],ranks[i],0,1,remote[i],win);
1384: MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],0,1,remote[i],op,win);
1385: MPI_Win_unlock(ranks[i],win);
1386: }
1387: return(0);
1388: }
1392: /*@C
1393: PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1395: Collective
1397: Input Arguments:
1398: + sf - star forest
1399: . unit - data type
1400: . leafdata - leaf values to use in reduction
1401: - op - operation to use for reduction
1403: Output Arguments:
1404: + rootdata - root values to be updated, input state is seen by first process to perform an update
1405: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1407: Level: advanced
1409: .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1410: @*/
1411: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1412: {
1414: MPI_Win win;
1418: PetscSFCheckGraphSet(sf,1);
1419: PetscSFFindWindow(sf,unit,rootdata,&win);
1420: /* Nothing to do currently because MPI_LOCK_EXCLUSIVE is used in PetscSFFetchAndOpBegin(), rendering this implementation synchronous. */
1421: PetscSFRestoreWindow(sf,unit,rootdata,PETSC_FALSE,0,&win);
1422: return(0);
1423: }
1427: /*@C
1428: PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1430: Collective
1432: Input Arguments:
1433: + sf - star forest
1434: . unit - data type
1435: - leafdata - leaf data to gather to roots
1437: Output Argument:
1438: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1440: Level: intermediate
1442: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1443: @*/
1444: PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1445: {
1447: PetscSF multi;
1450: PetscSFGetMultiSF(sf,&multi);
1451: PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);
1452: return(0);
1453: }
1457: /*@C
1458: PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1460: Collective
1462: Input Arguments:
1463: + sf - star forest
1464: . unit - data type
1465: - leafdata - leaf data to gather to roots
1467: Output Argument:
1468: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1470: Level: intermediate
1472: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1473: @*/
1474: PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1475: {
1477: PetscSF multi;
1481: PetscSFCheckGraphSet(sf,1);
1482: PetscSFGetMultiSF(sf,&multi);
1483: PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);
1484: return(0);
1485: }
1489: /*@C
1490: PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1492: Collective
1494: Input Arguments:
1495: + sf - star forest
1496: . unit - data type
1497: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1499: Output Argument:
1500: . leafdata - leaf data to be update with personal data from each respective root
1502: Level: intermediate
1504: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1505: @*/
1506: PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1507: {
1509: PetscSF multi;
1513: PetscSFCheckGraphSet(sf,1);
1514: PetscSFGetMultiSF(sf,&multi);
1515: PetscSFBcastBegin(multi,unit,multirootdata,leafdata);
1516: return(0);
1517: }
1521: /*@C
1522: PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1524: Collective
1526: Input Arguments:
1527: + sf - star forest
1528: . unit - data type
1529: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1531: Output Argument:
1532: . leafdata - leaf data to be update with personal data from each respective root
1534: Level: intermediate
1536: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1537: @*/
1538: PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1539: {
1541: PetscSF multi;
1545: PetscSFCheckGraphSet(sf,1);
1546: PetscSFGetMultiSF(sf,&multi);
1547: PetscSFBcastEnd(multi,unit,multirootdata,leafdata);
1548: return(0);
1549: }