Actual source code: sf.c
petsc-3.12.5 2020-03-29
1: #include <petsc/private/sfimpl.h>
2: #include <petsc/private/hashseti.h>
3: #include <petscctable.h>
5: #if defined(PETSC_USE_DEBUG)
6: # define PetscSFCheckGraphSet(sf,arg) do { \
7: if (PetscUnlikely(!(sf)->graphset)) \
8: SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
9: } while (0)
10: #else
11: # define PetscSFCheckGraphSet(sf,arg) do {} while (0)
12: #endif
14: const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0};
16: /*@
17: PetscSFCreate - create a star forest communication context
19: Collective
21: Input Arguments:
22: . comm - communicator on which the star forest will operate
24: Output Arguments:
25: . sf - new star forest context
27: Options Database Keys:
28: + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default)
29: . -sf_type window -Use MPI-3 one-sided window for communication
30: - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication
32: Level: intermediate
34: Notes:
35: When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
36: MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
37: SFs are optimized and they have better performance than general SFs.
39: .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
40: @*/
41: PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
42: {
44: PetscSF b;
48: PetscSFInitializePackage();
50: PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);
52: b->nroots = -1;
53: b->nleaves = -1;
54: b->minleaf = PETSC_MAX_INT;
55: b->maxleaf = PETSC_MIN_INT;
56: b->nranks = -1;
57: b->rankorder = PETSC_TRUE;
58: b->ingroup = MPI_GROUP_NULL;
59: b->outgroup = MPI_GROUP_NULL;
60: b->graphset = PETSC_FALSE;
62: *sf = b;
63: return(0);
64: }
66: /*@
67: PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
69: Collective
71: Input Arguments:
72: . sf - star forest
74: Level: advanced
76: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
77: @*/
78: PetscErrorCode PetscSFReset(PetscSF sf)
79: {
84: if (sf->ops->Reset) {(*sf->ops->Reset)(sf);}
85: sf->nroots = -1;
86: sf->nleaves = -1;
87: sf->minleaf = PETSC_MAX_INT;
88: sf->maxleaf = PETSC_MIN_INT;
89: sf->mine = NULL;
90: sf->remote = NULL;
91: sf->graphset = PETSC_FALSE;
92: PetscFree(sf->mine_alloc);
93: PetscFree(sf->remote_alloc);
94: sf->nranks = -1;
95: PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);
96: #if defined(PETSC_HAVE_CUDA)
97: if (sf->rmine_d) {cudaError_t err = cudaFree(sf->rmine_d);CHKERRCUDA(err);sf->rmine_d=NULL;}
98: #endif
99: sf->degreeknown = PETSC_FALSE;
100: PetscFree(sf->degree);
101: if (sf->ingroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->ingroup);}
102: if (sf->outgroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->outgroup);}
103: PetscSFDestroy(&sf->multi);
104: PetscLayoutDestroy(&sf->map);
105: sf->setupcalled = PETSC_FALSE;
106: return(0);
107: }
109: /*@C
110: PetscSFSetType - Set the PetscSF communication implementation
112: Collective on PetscSF
114: Input Parameters:
115: + sf - the PetscSF context
116: - type - a known method
118: Options Database Key:
119: . -sf_type <type> - Sets the method; use -help for a list
120: of available methods (for instance, window, pt2pt, neighbor)
122: Notes:
123: See "include/petscsf.h" for available methods (for instance)
124: + PETSCSFWINDOW - MPI-2/3 one-sided
125: - PETSCSFBASIC - basic implementation using MPI-1 two-sided
127: Level: intermediate
129: .seealso: PetscSFType, PetscSFCreate()
130: @*/
131: PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
132: {
133: PetscErrorCode ierr,(*r)(PetscSF);
134: PetscBool match;
140: PetscObjectTypeCompare((PetscObject)sf,type,&match);
141: if (match) return(0);
143: PetscFunctionListFind(PetscSFList,type,&r);
144: if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
145: /* Destroy the previous PetscSF implementation context */
146: if (sf->ops->Destroy) {(*(sf)->ops->Destroy)(sf);}
147: PetscMemzero(sf->ops,sizeof(*sf->ops));
148: PetscObjectChangeTypeName((PetscObject)sf,type);
149: (*r)(sf);
150: return(0);
151: }
153: /*@C
154: PetscSFGetType - Get the PetscSF communication implementation
156: Not Collective
158: Input Parameter:
159: . sf - the PetscSF context
161: Output Parameter:
162: . type - the PetscSF type name
164: Level: intermediate
166: .seealso: PetscSFSetType(), PetscSFCreate()
167: @*/
168: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
169: {
173: *type = ((PetscObject)sf)->type_name;
174: return(0);
175: }
177: /*@
178: PetscSFDestroy - destroy star forest
180: Collective
182: Input Arguments:
183: . sf - address of star forest
185: Level: intermediate
187: .seealso: PetscSFCreate(), PetscSFReset()
188: @*/
189: PetscErrorCode PetscSFDestroy(PetscSF *sf)
190: {
194: if (!*sf) return(0);
196: if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; return(0);}
197: PetscSFReset(*sf);
198: if ((*sf)->ops->Destroy) {(*(*sf)->ops->Destroy)(*sf);}
199: PetscHeaderDestroy(sf);
200: return(0);
201: }
203: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
204: {
205: #if defined(PETSC_USE_DEBUG)
206: PetscInt i, nleaves;
207: PetscMPIInt size;
208: const PetscInt *ilocal;
209: const PetscSFNode *iremote;
210: PetscErrorCode ierr;
213: if (!sf->graphset) return(0);
214: PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);
215: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
216: for (i = 0; i < nleaves; i++) {
217: const PetscInt rank = iremote[i].rank;
218: const PetscInt remote = iremote[i].index;
219: const PetscInt leaf = ilocal ? ilocal[i] : i;
220: if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
221: if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
222: if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
223: }
224: return(0);
225: #else
227: return(0);
228: #endif
229: }
231: /*@
232: PetscSFSetUp - set up communication structures
234: Collective
236: Input Arguments:
237: . sf - star forest communication object
239: Level: beginner
241: .seealso: PetscSFSetFromOptions(), PetscSFSetType()
242: @*/
243: PetscErrorCode PetscSFSetUp(PetscSF sf)
244: {
249: PetscSFCheckGraphSet(sf,1);
250: if (sf->setupcalled) return(0);
251: PetscSFCheckGraphValid_Private(sf);
252: if (!((PetscObject)sf)->type_name) {PetscSFSetType(sf,PETSCSFBASIC);}
253: PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);
254: if (sf->ops->SetUp) {(*sf->ops->SetUp)(sf);}
255: PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);
256: sf->setupcalled = PETSC_TRUE;
257: return(0);
258: }
260: /*@
261: PetscSFSetFromOptions - set PetscSF options using the options database
263: Logically Collective
265: Input Arguments:
266: . sf - star forest
268: Options Database Keys:
269: + -sf_type - implementation type, see PetscSFSetType()
270: - -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
272: Level: intermediate
274: .seealso: PetscSFWindowSetSyncType()
275: @*/
276: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
277: {
278: PetscSFType deft;
279: char type[256];
281: PetscBool flg;
285: PetscObjectOptionsBegin((PetscObject)sf);
286: deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
287: PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);
288: PetscSFSetType(sf,flg ? type : deft);
289: PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);
290: if (sf->ops->SetFromOptions) {(*sf->ops->SetFromOptions)(PetscOptionsObject,sf);}
291: PetscOptionsEnd();
292: return(0);
293: }
295: /*@
296: PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
298: Logically Collective
300: Input Arguments:
301: + sf - star forest
302: - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
304: Level: advanced
306: .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
307: @*/
308: PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
309: {
314: if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
315: sf->rankorder = flg;
316: return(0);
317: }
319: /*@
320: PetscSFSetGraph - Set a parallel star forest
322: Collective
324: Input Arguments:
325: + sf - star forest
326: . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
327: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
328: . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
329: during setup in debug mode)
330: . localmode - copy mode for ilocal
331: . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
332: during setup in debug mode)
333: - remotemode - copy mode for iremote
335: Level: intermediate
337: Notes:
338: In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
340: Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
341: encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
342: needed
344: Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
345: indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).
347: .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
348: @*/
349: PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
350: {
357: if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
358: if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
360: PetscSFReset(sf);
361: PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);
363: sf->nroots = nroots;
364: sf->nleaves = nleaves;
366: if (nleaves && ilocal) {
367: PetscInt i;
368: PetscInt minleaf = PETSC_MAX_INT;
369: PetscInt maxleaf = PETSC_MIN_INT;
370: int contiguous = 1;
371: for (i=0; i<nleaves; i++) {
372: minleaf = PetscMin(minleaf,ilocal[i]);
373: maxleaf = PetscMax(maxleaf,ilocal[i]);
374: contiguous &= (ilocal[i] == i);
375: }
376: sf->minleaf = minleaf;
377: sf->maxleaf = maxleaf;
378: if (contiguous) {
379: if (localmode == PETSC_OWN_POINTER) {
380: PetscFree(ilocal);
381: }
382: ilocal = NULL;
383: }
384: } else {
385: sf->minleaf = 0;
386: sf->maxleaf = nleaves - 1;
387: }
389: if (ilocal) {
390: switch (localmode) {
391: case PETSC_COPY_VALUES:
392: PetscMalloc1(nleaves,&sf->mine_alloc);
393: PetscArraycpy(sf->mine_alloc,ilocal,nleaves);
394: sf->mine = sf->mine_alloc;
395: break;
396: case PETSC_OWN_POINTER:
397: sf->mine_alloc = (PetscInt*)ilocal;
398: sf->mine = sf->mine_alloc;
399: break;
400: case PETSC_USE_POINTER:
401: sf->mine_alloc = NULL;
402: sf->mine = (PetscInt*)ilocal;
403: break;
404: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
405: }
406: }
408: switch (remotemode) {
409: case PETSC_COPY_VALUES:
410: PetscMalloc1(nleaves,&sf->remote_alloc);
411: PetscArraycpy(sf->remote_alloc,iremote,nleaves);
412: sf->remote = sf->remote_alloc;
413: break;
414: case PETSC_OWN_POINTER:
415: sf->remote_alloc = (PetscSFNode*)iremote;
416: sf->remote = sf->remote_alloc;
417: break;
418: case PETSC_USE_POINTER:
419: sf->remote_alloc = NULL;
420: sf->remote = (PetscSFNode*)iremote;
421: break;
422: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
423: }
425: PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);
426: sf->graphset = PETSC_TRUE;
427: return(0);
428: }
430: /*@
431: PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
433: Collective
435: Input Parameters:
436: + sf - The PetscSF
437: . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
438: - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
440: Notes:
441: It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
442: n and N are local and global sizes of x respectively.
444: With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
445: sequential vectors y on all ranks.
447: With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
448: sequential vector y on rank 0.
450: In above cases, entries of x are roots and entries of y are leaves.
452: With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
453: creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
454: of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
455: not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
456: items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
458: In this case, roots and leaves are symmetric.
460: Level: intermediate
461: @*/
462: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
463: {
464: MPI_Comm comm;
465: PetscInt n,N,res[2];
466: PetscMPIInt rank,size;
467: PetscSFType type;
471: PetscObjectGetComm((PetscObject)sf, &comm);
472: if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
473: MPI_Comm_rank(comm,&rank);
474: MPI_Comm_size(comm,&size);
476: if (pattern == PETSCSF_PATTERN_ALLTOALL) {
477: type = PETSCSFALLTOALL;
478: PetscLayoutCreate(comm,&sf->map);
479: PetscLayoutSetLocalSize(sf->map,size);
480: PetscLayoutSetSize(sf->map,((PetscInt)size)*size);
481: PetscLayoutSetUp(sf->map);
482: } else {
483: PetscLayoutGetLocalSize(map,&n);
484: PetscLayoutGetSize(map,&N);
485: res[0] = n;
486: res[1] = -n;
487: /* Check if n are same over all ranks so that we can optimize it */
488: MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);
489: if (res[0] == -res[1]) { /* same n */
490: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
491: } else {
492: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
493: }
494: PetscLayoutReference(map,&sf->map);
495: }
496: PetscSFSetType(sf,type);
498: sf->pattern = pattern;
499: sf->mine = NULL; /* Contiguous */
501: /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
502: Also set other easy stuff.
503: */
504: if (pattern == PETSCSF_PATTERN_ALLGATHER) {
505: sf->nleaves = N;
506: sf->nroots = n;
507: sf->nranks = size;
508: sf->minleaf = 0;
509: sf->maxleaf = N - 1;
510: } else if (pattern == PETSCSF_PATTERN_GATHER) {
511: sf->nleaves = rank ? 0 : N;
512: sf->nroots = n;
513: sf->nranks = rank ? 0 : size;
514: sf->minleaf = 0;
515: sf->maxleaf = rank ? -1 : N - 1;
516: } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
517: sf->nleaves = size;
518: sf->nroots = size;
519: sf->nranks = size;
520: sf->minleaf = 0;
521: sf->maxleaf = size - 1;
522: }
523: sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
524: sf->graphset = PETSC_TRUE;
525: return(0);
526: }
528: /*@
529: PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
531: Collective
533: Input Arguments:
535: . sf - star forest to invert
537: Output Arguments:
538: . isf - inverse of sf
539: Level: advanced
541: Notes:
542: All roots must have degree 1.
544: The local space may be a permutation, but cannot be sparse.
546: .seealso: PetscSFSetGraph()
547: @*/
548: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
549: {
551: PetscMPIInt rank;
552: PetscInt i,nroots,nleaves,maxlocal,count,*newilocal;
553: const PetscInt *ilocal;
554: PetscSFNode *roots,*leaves;
558: PetscSFCheckGraphSet(sf,1);
561: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);
562: maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
564: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
565: PetscMalloc2(nroots,&roots,maxlocal,&leaves);
566: for (i=0; i<maxlocal; i++) {
567: leaves[i].rank = rank;
568: leaves[i].index = i;
569: }
570: for (i=0; i <nroots; i++) {
571: roots[i].rank = -1;
572: roots[i].index = -1;
573: }
574: PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);
575: PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);
577: /* Check whether our leaves are sparse */
578: for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
579: if (count == nroots) newilocal = NULL;
580: else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
581: PetscMalloc1(count,&newilocal);
582: for (i=0,count=0; i<nroots; i++) {
583: if (roots[i].rank >= 0) {
584: newilocal[count] = i;
585: roots[count].rank = roots[i].rank;
586: roots[count].index = roots[i].index;
587: count++;
588: }
589: }
590: }
592: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);
593: PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);
594: PetscFree2(roots,leaves);
595: return(0);
596: }
598: /*@
599: PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
601: Collective
603: Input Arguments:
604: + sf - communication object to duplicate
605: - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
607: Output Arguments:
608: . newsf - new communication object
610: Level: beginner
612: .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
613: @*/
614: PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
615: {
616: PetscSFType type;
623: PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);
624: PetscSFGetType(sf,&type);
625: if (type) {PetscSFSetType(*newsf,type);}
626: if (opt == PETSCSF_DUPLICATE_GRAPH) {
627: PetscSFCheckGraphSet(sf,1);
628: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
629: PetscInt nroots,nleaves;
630: const PetscInt *ilocal;
631: const PetscSFNode *iremote;
632: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
633: PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);
634: } else {
635: PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);
636: }
637: }
638: if (sf->ops->Duplicate) {(*sf->ops->Duplicate)(sf,opt,*newsf);}
639: return(0);
640: }
642: /*@C
643: PetscSFGetGraph - Get the graph specifying a parallel star forest
645: Not Collective
647: Input Arguments:
648: . sf - star forest
650: Output Arguments:
651: + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
652: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
653: . ilocal - locations of leaves in leafdata buffers
654: - iremote - remote locations of root vertices for each leaf on the current process
656: Notes:
657: We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
659: When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you
660: want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.
662: Level: intermediate
664: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
665: @*/
666: PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
667: {
672: if (sf->ops->GetGraph) {
673: (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);
674: } else {
675: if (nroots) *nroots = sf->nroots;
676: if (nleaves) *nleaves = sf->nleaves;
677: if (ilocal) *ilocal = sf->mine;
678: if (iremote) *iremote = sf->remote;
679: }
680: return(0);
681: }
683: /*@
684: PetscSFGetLeafRange - Get the active leaf ranges
686: Not Collective
688: Input Arguments:
689: . sf - star forest
691: Output Arguments:
692: + minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
693: - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
695: Level: developer
697: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
698: @*/
699: PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
700: {
704: PetscSFCheckGraphSet(sf,1);
705: if (minleaf) *minleaf = sf->minleaf;
706: if (maxleaf) *maxleaf = sf->maxleaf;
707: return(0);
708: }
710: /*@C
711: PetscSFView - view a star forest
713: Collective
715: Input Arguments:
716: + sf - star forest
717: - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
719: Level: beginner
721: .seealso: PetscSFCreate(), PetscSFSetGraph()
722: @*/
723: PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
724: {
725: PetscErrorCode ierr;
726: PetscBool iascii;
727: PetscViewerFormat format;
731: if (!viewer) {PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);}
734: if (sf->graphset) {PetscSFSetUp(sf);}
735: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
736: if (iascii) {
737: PetscMPIInt rank;
738: PetscInt ii,i,j;
740: PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);
741: PetscViewerASCIIPushTab(viewer);
742: if (sf->ops->View) {(*sf->ops->View)(sf,viewer);}
743: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
744: if (!sf->graphset) {
745: PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");
746: PetscViewerASCIIPopTab(viewer);
747: return(0);
748: }
749: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
750: PetscViewerASCIIPushSynchronized(viewer);
751: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);
752: for (i=0; i<sf->nleaves; i++) {
753: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);
754: }
755: PetscViewerFlush(viewer);
756: PetscViewerGetFormat(viewer,&format);
757: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
758: PetscMPIInt *tmpranks,*perm;
759: PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);
760: PetscArraycpy(tmpranks,sf->ranks,sf->nranks);
761: for (i=0; i<sf->nranks; i++) perm[i] = i;
762: PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);
763: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);
764: for (ii=0; ii<sf->nranks; ii++) {
765: i = perm[ii];
766: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);
767: for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
768: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);
769: }
770: }
771: PetscFree2(tmpranks,perm);
772: }
773: PetscViewerFlush(viewer);
774: PetscViewerASCIIPopSynchronized(viewer);
775: }
776: PetscViewerASCIIPopTab(viewer);
777: }
778: return(0);
779: }
781: /*@C
782: PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
784: Not Collective
786: Input Arguments:
787: . sf - star forest
789: Output Arguments:
790: + nranks - number of ranks referenced by local part
791: . ranks - array of ranks
792: . roffset - offset in rmine/rremote for each rank (length nranks+1)
793: . rmine - concatenated array holding local indices referencing each remote rank
794: - rremote - concatenated array holding remote indices referenced for each remote rank
796: Level: developer
798: .seealso: PetscSFGetLeafRanks()
799: @*/
800: PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
801: {
806: if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
807: if (sf->ops->GetRootRanks) {
808: (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);
809: } else {
810: /* The generic implementation */
811: if (nranks) *nranks = sf->nranks;
812: if (ranks) *ranks = sf->ranks;
813: if (roffset) *roffset = sf->roffset;
814: if (rmine) *rmine = sf->rmine;
815: if (rremote) *rremote = sf->rremote;
816: }
817: return(0);
818: }
820: /*@C
821: PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
823: Not Collective
825: Input Arguments:
826: . sf - star forest
828: Output Arguments:
829: + niranks - number of leaf ranks referencing roots on this process
830: . iranks - array of ranks
831: . ioffset - offset in irootloc for each rank (length niranks+1)
832: - irootloc - concatenated array holding local indices of roots referenced by each leaf rank
834: Level: developer
836: .seealso: PetscSFGetRootRanks()
837: @*/
838: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
839: {
844: if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
845: if (sf->ops->GetLeafRanks) {
846: (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);
847: } else {
848: PetscSFType type;
849: PetscSFGetType(sf,&type);
850: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
851: }
852: return(0);
853: }
855: static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
856: PetscInt i;
857: for (i=0; i<n; i++) {
858: if (needle == list[i]) return PETSC_TRUE;
859: }
860: return PETSC_FALSE;
861: }
863: /*@C
864: PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
866: Collective
868: Input Arguments:
869: + sf - PetscSF to set up; PetscSFSetGraph() must have been called
870: - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
872: Level: developer
874: .seealso: PetscSFGetRootRanks()
875: @*/
876: PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
877: {
878: PetscErrorCode ierr;
879: PetscTable table;
880: PetscTablePosition pos;
881: PetscMPIInt size,groupsize,*groupranks;
882: PetscInt *rcount,*ranks;
883: PetscInt i, irank = -1,orank = -1;
887: PetscSFCheckGraphSet(sf,1);
888: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
889: PetscTableCreate(10,size,&table);
890: for (i=0; i<sf->nleaves; i++) {
891: /* Log 1-based rank */
892: PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);
893: }
894: PetscTableGetCount(table,&sf->nranks);
895: PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);
896: PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);
897: PetscTableGetHeadPosition(table,&pos);
898: for (i=0; i<sf->nranks; i++) {
899: PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);
900: ranks[i]--; /* Convert back to 0-based */
901: }
902: PetscTableDestroy(&table);
904: /* We expect that dgroup is reliably "small" while nranks could be large */
905: {
906: MPI_Group group = MPI_GROUP_NULL;
907: PetscMPIInt *dgroupranks;
908: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
909: MPI_Group_size(dgroup,&groupsize);
910: PetscMalloc1(groupsize,&dgroupranks);
911: PetscMalloc1(groupsize,&groupranks);
912: for (i=0; i<groupsize; i++) dgroupranks[i] = i;
913: if (groupsize) {MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);}
914: MPI_Group_free(&group);
915: PetscFree(dgroupranks);
916: }
918: /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
919: for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) {
920: for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
921: if (InList(ranks[i],groupsize,groupranks)) break;
922: }
923: for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
924: if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
925: }
926: if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
927: PetscInt tmprank,tmpcount;
929: tmprank = ranks[i];
930: tmpcount = rcount[i];
931: ranks[i] = ranks[sf->ndranks];
932: rcount[i] = rcount[sf->ndranks];
933: ranks[sf->ndranks] = tmprank;
934: rcount[sf->ndranks] = tmpcount;
935: sf->ndranks++;
936: }
937: }
938: PetscFree(groupranks);
939: PetscSortIntWithArray(sf->ndranks,ranks,rcount);
940: PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);
941: sf->roffset[0] = 0;
942: for (i=0; i<sf->nranks; i++) {
943: PetscMPIIntCast(ranks[i],sf->ranks+i);
944: sf->roffset[i+1] = sf->roffset[i] + rcount[i];
945: rcount[i] = 0;
946: }
947: for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
948: /* short circuit */
949: if (orank != sf->remote[i].rank) {
950: /* Search for index of iremote[i].rank in sf->ranks */
951: PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);
952: if (irank < 0) {
953: PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);
954: if (irank >= 0) irank += sf->ndranks;
955: }
956: orank = sf->remote[i].rank;
957: }
958: if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
959: sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i;
960: sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
961: rcount[irank]++;
962: }
963: PetscFree2(rcount,ranks);
964: return(0);
965: }
967: /*@C
968: PetscSFGetGroups - gets incoming and outgoing process groups
970: Collective
972: Input Argument:
973: . sf - star forest
975: Output Arguments:
976: + incoming - group of origin processes for incoming edges (leaves that reference my roots)
977: - outgoing - group of destination processes for outgoing edges (roots that I reference)
979: Level: developer
981: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
982: @*/
983: PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
984: {
986: MPI_Group group = MPI_GROUP_NULL;
989: if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining groups");
990: if (sf->ingroup == MPI_GROUP_NULL) {
991: PetscInt i;
992: const PetscInt *indegree;
993: PetscMPIInt rank,*outranks,*inranks;
994: PetscSFNode *remote;
995: PetscSF bgcount;
997: /* Compute the number of incoming ranks */
998: PetscMalloc1(sf->nranks,&remote);
999: for (i=0; i<sf->nranks; i++) {
1000: remote[i].rank = sf->ranks[i];
1001: remote[i].index = 0;
1002: }
1003: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);
1004: PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1005: PetscSFComputeDegreeBegin(bgcount,&indegree);
1006: PetscSFComputeDegreeEnd(bgcount,&indegree);
1008: /* Enumerate the incoming ranks */
1009: PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);
1010: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1011: for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1012: PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);
1013: PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);
1014: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1015: MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);
1016: MPI_Group_free(&group);
1017: PetscFree2(inranks,outranks);
1018: PetscSFDestroy(&bgcount);
1019: }
1020: *incoming = sf->ingroup;
1022: if (sf->outgroup == MPI_GROUP_NULL) {
1023: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1024: MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);
1025: MPI_Group_free(&group);
1026: }
1027: *outgoing = sf->outgroup;
1028: return(0);
1029: }
1031: /*@
1032: PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
1034: Collective
1036: Input Argument:
1037: . sf - star forest that may contain roots with 0 or with more than 1 vertex
1039: Output Arguments:
1040: . multi - star forest with split roots, such that each root has degree exactly 1
1042: Level: developer
1044: Notes:
1046: In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1047: directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1048: edge, it is a candidate for future optimization that might involve its removal.
1050: .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1051: @*/
1052: PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1053: {
1059: if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1060: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1061: *multi = sf->multi;
1062: return(0);
1063: }
1064: if (!sf->multi) {
1065: const PetscInt *indegree;
1066: PetscInt i,*inoffset,*outones,*outoffset,maxlocal;
1067: PetscSFNode *remote;
1068: maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1069: PetscSFComputeDegreeBegin(sf,&indegree);
1070: PetscSFComputeDegreeEnd(sf,&indegree);
1071: PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);
1072: inoffset[0] = 0;
1073: for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1074: for (i=0; i<maxlocal; i++) outones[i] = 1;
1075: PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1076: PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1077: for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1078: #if 0
1079: #if defined(PETSC_USE_DEBUG) /* Check that the expected number of increments occurred */
1080: for (i=0; i<sf->nroots; i++) {
1081: if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1082: }
1083: #endif
1084: #endif
1085: PetscMalloc1(sf->nleaves,&remote);
1086: for (i=0; i<sf->nleaves; i++) {
1087: remote[i].rank = sf->remote[i].rank;
1088: remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1089: }
1090: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1091: PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1092: if (sf->rankorder) { /* Sort the ranks */
1093: PetscMPIInt rank;
1094: PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1095: PetscSFNode *newremote;
1096: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1097: for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1098: PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);
1099: for (i=0; i<maxlocal; i++) outranks[i] = rank;
1100: PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);
1101: PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);
1102: /* Sort the incoming ranks at each vertex, build the inverse map */
1103: for (i=0; i<sf->nroots; i++) {
1104: PetscInt j;
1105: for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1106: PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);
1107: for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1108: }
1109: PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);
1110: PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);
1111: PetscMalloc1(sf->nleaves,&newremote);
1112: for (i=0; i<sf->nleaves; i++) {
1113: newremote[i].rank = sf->remote[i].rank;
1114: newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1115: }
1116: PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);
1117: PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);
1118: }
1119: PetscFree3(inoffset,outones,outoffset);
1120: }
1121: *multi = sf->multi;
1122: return(0);
1123: }
1125: /*@C
1126: PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
1128: Collective
1130: Input Arguments:
1131: + sf - original star forest
1132: . nselected - number of selected roots on this process
1133: - selected - indices of the selected roots on this process
1135: Output Arguments:
1136: . newsf - new star forest
1138: Level: advanced
1140: Note:
1141: To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1142: be done by calling PetscSFGetGraph().
1144: .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1145: @*/
1146: PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1147: {
1148: PetscInt i,n,*roots,*rootdata,*leafdata,nroots,nleaves,connected_leaves,*new_ilocal;
1149: const PetscSFNode *iremote;
1150: PetscSFNode *new_iremote;
1151: PetscSF tmpsf;
1152: MPI_Comm comm;
1153: PetscErrorCode ierr;
1157: PetscSFCheckGraphSet(sf,1);
1161: PetscSFSetUp(sf);
1163: /* Uniq selected[] and put results in roots[] */
1164: PetscObjectGetComm((PetscObject)sf,&comm);
1165: PetscMalloc1(nselected,&roots);
1166: PetscArraycpy(roots,selected,nselected);
1167: PetscSortedRemoveDupsInt(&nselected,roots);
1168: if (nselected && (roots[0] < 0 || roots[nselected-1] >= sf->nroots)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",roots[0],roots[nselected-1],sf->nroots);
1170: if (sf->ops->CreateEmbeddedSF) {
1171: (*sf->ops->CreateEmbeddedSF)(sf,nselected,roots,newsf);
1172: } else {
1173: /* A generic version of creating embedded sf. Note that we called PetscSFSetGraph() twice, which is certainly expensive */
1174: /* Find out which leaves (not leaf data items) are still connected to roots in the embedded sf */
1175: PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);
1176: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&tmpsf);
1177: PetscSFSetGraph(tmpsf,nroots,nleaves,NULL/*contiguous*/,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);
1178: PetscCalloc2(nroots,&rootdata,nleaves,&leafdata);
1179: for (i=0; i<nselected; ++i) rootdata[roots[i]] = 1;
1180: PetscSFBcastBegin(tmpsf,MPIU_INT,rootdata,leafdata);
1181: PetscSFBcastEnd(tmpsf,MPIU_INT,rootdata,leafdata);
1182: PetscSFDestroy(&tmpsf);
1184: /* Build newsf with leaves that are still connected */
1185: connected_leaves = 0;
1186: for (i=0; i<nleaves; ++i) connected_leaves += leafdata[i];
1187: PetscMalloc1(connected_leaves,&new_ilocal);
1188: PetscMalloc1(connected_leaves,&new_iremote);
1189: for (i=0, n=0; i<nleaves; ++i) {
1190: if (leafdata[i]) {
1191: new_ilocal[n] = sf->mine ? sf->mine[i] : i;
1192: new_iremote[n].rank = sf->remote[i].rank;
1193: new_iremote[n].index = sf->remote[i].index;
1194: ++n;
1195: }
1196: }
1198: if (n != connected_leaves) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"There is a size mismatch in the SF embedding, %d != %d",n,connected_leaves);
1199: PetscSFCreate(comm,newsf);
1200: PetscSFSetFromOptions(*newsf);
1201: PetscSFSetGraph(*newsf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1202: PetscFree2(rootdata,leafdata);
1203: }
1204: PetscFree(roots);
1205: return(0);
1206: }
1208: /*@C
1209: PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1211: Collective
1213: Input Arguments:
1214: + sf - original star forest
1215: . nselected - number of selected leaves on this process
1216: - selected - indices of the selected leaves on this process
1218: Output Arguments:
1219: . newsf - new star forest
1221: Level: advanced
1223: .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1224: @*/
1225: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1226: {
1227: const PetscSFNode *iremote;
1228: PetscSFNode *new_iremote;
1229: const PetscInt *ilocal;
1230: PetscInt i,nroots,*leaves,*new_ilocal;
1231: MPI_Comm comm;
1232: PetscErrorCode ierr;
1236: PetscSFCheckGraphSet(sf,1);
1240: /* Uniq selected[] and put results in leaves[] */
1241: PetscObjectGetComm((PetscObject)sf,&comm);
1242: PetscMalloc1(nselected,&leaves);
1243: PetscArraycpy(leaves,selected,nselected);
1244: PetscSortedRemoveDupsInt(&nselected,leaves);
1245: if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);
1247: /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1248: if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1249: (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);
1250: } else {
1251: PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);
1252: PetscMalloc1(nselected,&new_ilocal);
1253: PetscMalloc1(nselected,&new_iremote);
1254: for (i=0; i<nselected; ++i) {
1255: const PetscInt l = leaves[i];
1256: new_ilocal[i] = ilocal ? ilocal[l] : l;
1257: new_iremote[i].rank = iremote[l].rank;
1258: new_iremote[i].index = iremote[l].index;
1259: }
1260: PetscSFCreate(comm,newsf);
1261: PetscSFSetFromOptions(*newsf);
1262: PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1263: }
1264: PetscFree(leaves);
1265: return(0);
1266: }
1268: /*@C
1269: PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()
1271: Collective on PetscSF
1273: Input Arguments:
1274: + sf - star forest on which to communicate
1275: . unit - data type associated with each node
1276: . rootdata - buffer to broadcast
1277: - op - operation to use for reduction
1279: Output Arguments:
1280: . leafdata - buffer to be reduced with values from each leaf's respective root
1282: Level: intermediate
1284: .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd()
1285: @*/
1286: PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1287: {
1289: PetscMemType rootmtype,leafmtype;
1293: PetscSFSetUp(sf);
1294: PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1295: PetscGetMemType(rootdata,&rootmtype);
1296: PetscGetMemType(leafdata,&leafmtype);
1297: #if defined(PETSC_HAVE_CUDA)
1298: /* Shall we assume rootdata, leafdata are ready to use (instead of being computed by some asynchronous kernels)?
1299: To be similar to MPI, I'd like to have this assumption, since MPI does not have a concept of stream.
1300: But currently this assumption is not enforecd in Petsc. To be safe, I do synchronization here. Otherwise, if
1301: we do not sync now and call the Pack kernel directly on the default NULL stream (assume petsc objects are also
1302: computed on it), we have to sync the NULL stream before calling MPI routines. So, it looks a cudaDeviceSynchronize
1303: is inevitable. We do it now and put pack/unpack kernels to non-NULL streams.
1304: */
1305: if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);}
1306: #endif
1307: (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1308: PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1309: return(0);
1310: }
1312: /*@C
1313: PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()
1315: Collective
1317: Input Arguments:
1318: + sf - star forest
1319: . unit - data type
1320: . rootdata - buffer to broadcast
1321: - op - operation to use for reduction
1323: Output Arguments:
1324: . leafdata - buffer to be reduced with values from each leaf's respective root
1326: Level: intermediate
1328: .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1329: @*/
1330: PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1331: {
1333: PetscMemType rootmtype,leafmtype;
1337: PetscSFSetUp(sf);
1338: PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);
1339: PetscGetMemType(rootdata,&rootmtype);
1340: PetscGetMemType(leafdata,&leafmtype);
1341: (*sf->ops->BcastAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1342: PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);
1343: return(0);
1344: }
1346: /*@C
1347: PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1349: Collective
1351: Input Arguments:
1352: + sf - star forest
1353: . unit - data type
1354: . leafdata - values to reduce
1355: - op - reduction operation
1357: Output Arguments:
1358: . rootdata - result of reduction of values from all leaves of each root
1360: Level: intermediate
1362: .seealso: PetscSFBcastBegin()
1363: @*/
1364: PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1365: {
1367: PetscMemType rootmtype,leafmtype;
1371: PetscSFSetUp(sf);
1372: PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);
1373: PetscGetMemType(rootdata,&rootmtype);
1374: PetscGetMemType(leafdata,&leafmtype);
1375: #if defined(PETSC_HAVE_CUDA)
1376: if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);}
1377: #endif
1378: (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1379: PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);
1380: return(0);
1381: }
1383: /*@C
1384: PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1386: Collective
1388: Input Arguments:
1389: + sf - star forest
1390: . unit - data type
1391: . leafdata - values to reduce
1392: - op - reduction operation
1394: Output Arguments:
1395: . rootdata - result of reduction of values from all leaves of each root
1397: Level: intermediate
1399: .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1400: @*/
1401: PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1402: {
1404: PetscMemType rootmtype,leafmtype;
1408: PetscSFSetUp(sf);
1409: PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);
1410: PetscGetMemType(rootdata,&rootmtype);
1411: PetscGetMemType(leafdata,&leafmtype);
1412: (*sf->ops->ReduceEnd)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1413: PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);
1414: return(0);
1415: }
1417: /*@C
1418: PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1420: Collective
1422: Input Arguments:
1423: + sf - star forest
1424: . unit - data type
1425: . leafdata - leaf values to use in reduction
1426: - op - operation to use for reduction
1428: Output Arguments:
1429: + rootdata - root values to be updated, input state is seen by first process to perform an update
1430: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1432: Level: advanced
1434: Note:
1435: The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1436: might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1437: not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1438: integers.
1440: .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1441: @*/
1442: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1443: {
1445: PetscMemType rootmtype,leafmtype,leafupdatemtype;
1449: PetscSFSetUp(sf);
1450: PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1451: PetscGetMemType(rootdata,&rootmtype);
1452: PetscGetMemType(leafdata,&leafmtype);
1453: PetscGetMemType(leafupdate,&leafupdatemtype);
1454: if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1455: #if defined(PETSC_HAVE_CUDA)
1456: if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);}
1457: #endif
1458: (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1459: PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1460: return(0);
1461: }
1463: /*@C
1464: PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1466: Collective
1468: Input Arguments:
1469: + sf - star forest
1470: . unit - data type
1471: . leafdata - leaf values to use in reduction
1472: - op - operation to use for reduction
1474: Output Arguments:
1475: + rootdata - root values to be updated, input state is seen by first process to perform an update
1476: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1478: Level: advanced
1480: .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1481: @*/
1482: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1483: {
1485: PetscMemType rootmtype,leafmtype,leafupdatemtype;
1489: PetscSFSetUp(sf);
1490: PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1491: PetscGetMemType(rootdata,&rootmtype);
1492: PetscGetMemType(leafdata,&leafmtype);
1493: PetscGetMemType(leafupdate,&leafupdatemtype);
1494: if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1495: (*sf->ops->FetchAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1496: PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1497: return(0);
1498: }
1500: /*@C
1501: PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1503: Collective
1505: Input Arguments:
1506: . sf - star forest
1508: Output Arguments:
1509: . degree - degree of each root vertex
1511: Level: advanced
1513: Notes:
1514: The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1516: .seealso: PetscSFGatherBegin()
1517: @*/
1518: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1519: {
1524: PetscSFCheckGraphSet(sf,1);
1526: if (!sf->degreeknown) {
1527: PetscInt i, nroots = sf->nroots, maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1528: if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1529: PetscMalloc1(nroots,&sf->degree);
1530: PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1531: for (i=0; i<nroots; i++) sf->degree[i] = 0;
1532: for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1533: PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);
1534: }
1535: *degree = NULL;
1536: return(0);
1537: }
1539: /*@C
1540: PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1542: Collective
1544: Input Arguments:
1545: . sf - star forest
1547: Output Arguments:
1548: . degree - degree of each root vertex
1550: Level: developer
1552: Notes:
1553: The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1555: .seealso:
1556: @*/
1557: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1558: {
1563: PetscSFCheckGraphSet(sf,1);
1565: if (!sf->degreeknown) {
1566: if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1567: PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);
1568: PetscFree(sf->degreetmp);
1569: sf->degreeknown = PETSC_TRUE;
1570: }
1571: *degree = sf->degree;
1572: return(0);
1573: }
1576: /*@C
1577: PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1578: Each multi-root is assigned index of the corresponding original root.
1580: Collective
1582: Input Arguments:
1583: + sf - star forest
1584: - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1586: Output Arguments:
1587: + nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1588: - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1590: Level: developer
1592: Notes:
1593: The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1595: .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1596: @*/
1597: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1598: {
1599: PetscSF msf;
1600: PetscInt i, j, k, nroots, nmroots;
1601: PetscErrorCode ierr;
1605: PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);
1609: PetscSFGetMultiSF(sf,&msf);
1610: PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);
1611: PetscMalloc1(nmroots, multiRootsOrigNumbering);
1612: for (i=0,j=0,k=0; i<nroots; i++) {
1613: if (!degree[i]) continue;
1614: for (j=0; j<degree[i]; j++,k++) {
1615: (*multiRootsOrigNumbering)[k] = i;
1616: }
1617: }
1618: #if defined(PETSC_USE_DEBUG)
1619: if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1620: #endif
1621: if (nMultiRoots) *nMultiRoots = nmroots;
1622: return(0);
1623: }
1625: /*@C
1626: PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1628: Collective
1630: Input Arguments:
1631: + sf - star forest
1632: . unit - data type
1633: - leafdata - leaf data to gather to roots
1635: Output Argument:
1636: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1638: Level: intermediate
1640: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1641: @*/
1642: PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1643: {
1645: PetscSF multi;
1649: PetscSFSetUp(sf);
1650: PetscSFGetMultiSF(sf,&multi);
1651: PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);
1652: return(0);
1653: }
1655: /*@C
1656: PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1658: Collective
1660: Input Arguments:
1661: + sf - star forest
1662: . unit - data type
1663: - leafdata - leaf data to gather to roots
1665: Output Argument:
1666: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1668: Level: intermediate
1670: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1671: @*/
1672: PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1673: {
1675: PetscSF multi;
1679: PetscSFSetUp(sf);
1680: PetscSFGetMultiSF(sf,&multi);
1681: PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);
1682: return(0);
1683: }
1685: /*@C
1686: PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1688: Collective
1690: Input Arguments:
1691: + sf - star forest
1692: . unit - data type
1693: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1695: Output Argument:
1696: . leafdata - leaf data to be update with personal data from each respective root
1698: Level: intermediate
1700: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1701: @*/
1702: PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1703: {
1705: PetscSF multi;
1709: PetscSFSetUp(sf);
1710: PetscSFGetMultiSF(sf,&multi);
1711: PetscSFBcastBegin(multi,unit,multirootdata,leafdata);
1712: return(0);
1713: }
1715: /*@C
1716: PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1718: Collective
1720: Input Arguments:
1721: + sf - star forest
1722: . unit - data type
1723: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1725: Output Argument:
1726: . leafdata - leaf data to be update with personal data from each respective root
1728: Level: intermediate
1730: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1731: @*/
1732: PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1733: {
1735: PetscSF multi;
1739: PetscSFSetUp(sf);
1740: PetscSFGetMultiSF(sf,&multi);
1741: PetscSFBcastEnd(multi,unit,multirootdata,leafdata);
1742: return(0);
1743: }
1745: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1746: {
1747: #if defined(PETSC_USE_DEBUG)
1748: PetscInt i, n, nleaves;
1749: const PetscInt *ilocal = NULL;
1750: PetscHSetI seen;
1751: PetscErrorCode ierr;
1754: PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);
1755: PetscHSetICreate(&seen);
1756: for (i = 0; i < nleaves; i++) {
1757: const PetscInt leaf = ilocal ? ilocal[i] : i;
1758: PetscHSetIAdd(seen,leaf);
1759: }
1760: PetscHSetIGetSize(seen,&n);
1761: if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1762: PetscHSetIDestroy(&seen);
1763: return(0);
1764: #else
1766: return(0);
1767: #endif
1768: }
1769: /*@
1770: PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1772: Input Parameters:
1773: + sfA - The first PetscSF, whose local space may be a permutation, but can not be sparse.
1774: - sfB - The second PetscSF, whose number of roots must be equal to number of leaves of sfA on each processor
1776: Output Parameters:
1777: . sfBA - The composite SF. Doing a Bcast on the new SF is equvalent to doing Bcast on sfA, then Bcast on sfB
1779: Level: developer
1781: Notes:
1782: For the resulting composed SF to be valid, the input SFs must be true star forests: the leaves must be unique. This is
1783: checked in debug mode.
1785: .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1786: @*/
1787: PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1788: {
1789: PetscErrorCode ierr;
1790: MPI_Comm comm;
1791: const PetscSFNode *remotePointsA,*remotePointsB;
1792: PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1793: const PetscInt *localPointsA,*localPointsB,*localPointsBA;
1794: PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf;
1798: PetscSFCheckGraphSet(sfA,1);
1800: PetscSFCheckGraphSet(sfB,2);
1802: PetscObjectGetComm((PetscObject)sfA,&comm);
1803: PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);
1804: PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);
1805: PetscSFGetLeafRange(sfA,&minleaf,&maxleaf);
1806: if (numRootsB != numLeavesA) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The second SF's number of roots must be equal to the first SF's number of leaves");
1807: if (maxleaf+1 != numLeavesA || minleaf) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The first SF can not have sparse local space");
1808: /* The above check is fast, but not sufficient, since we cannot guarantee that the SF has unique leaves. So in debug
1809: mode, check properly. */
1810: PetscSFCheckLeavesUnique_Private(sfA);
1811: if (localPointsA) {
1812: /* Local space is dense permutation of identity. Need to rewire order of the remote points */
1813: PetscMalloc1(numLeavesA,&reorderedRemotePointsA);
1814: for (i=0; i<numLeavesA; i++) reorderedRemotePointsA[localPointsA[i]-minleaf] = remotePointsA[i];
1815: remotePointsA = reorderedRemotePointsA;
1816: }
1817: PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);
1818: PetscMalloc1(maxleaf-minleaf+1,&leafdataB);
1819: PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);
1820: PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);
1821: PetscFree(reorderedRemotePointsA);
1823: /* sfB's leaves must be unique, otherwise BcastAndOp(B o A) != BcastAndOp(B) o BcastAndOp(A) */
1824: PetscSFCheckLeavesUnique_Private(sfB);
1825: if (minleaf == 0 && maxleaf + 1 == numLeavesB) { /* Local space of sfB is an identity or permutation */
1826: localPointsBA = NULL;
1827: remotePointsBA = leafdataB;
1828: } else {
1829: localPointsBA = localPointsB;
1830: PetscMalloc1(numLeavesB,&remotePointsBA);
1831: for (i=0; i<numLeavesB; i++) remotePointsBA[i] = leafdataB[localPointsB[i]-minleaf];
1832: PetscFree(leafdataB);
1833: }
1834: PetscSFCreate(comm, sfBA);
1835: PetscSFSetGraph(*sfBA,numRootsA,numLeavesB,localPointsBA,PETSC_COPY_VALUES,remotePointsBA,PETSC_OWN_POINTER);
1836: return(0);
1837: }
1839: /*@
1840: PetscSFComposeInverse - Compose a new PetscSF by putting inverse of the second SF under the first one
1842: Input Parameters:
1843: + sfA - The first PetscSF, whose local space may be a permutation, but can not be sparse.
1844: - sfB - The second PetscSF, whose number of leaves must be equal to number of leaves of sfA on each processor. All roots must have degree 1.
1846: Output Parameters:
1847: . sfBA - The composite SF. Doing a Bcast on the new SF is equvalent to doing Bcast on sfA, then Bcast on inverse of sfB
1849: Level: developer
1851: .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph()
1852: @*/
1853: PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1854: {
1855: PetscErrorCode ierr;
1856: MPI_Comm comm;
1857: const PetscSFNode *remotePointsA,*remotePointsB;
1858: PetscSFNode *remotePointsBA;
1859: const PetscInt *localPointsA,*localPointsB;
1860: PetscInt numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf;
1864: PetscSFCheckGraphSet(sfA,1);
1866: PetscSFCheckGraphSet(sfB,2);
1868: PetscObjectGetComm((PetscObject) sfA, &comm);
1869: PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
1870: PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);
1871: PetscSFGetLeafRange(sfA,&minleaf,&maxleaf);
1872: if (maxleaf+1-minleaf != numLeavesA) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The first SF can not have sparse local space");
1873: if (numLeavesA != numLeavesB) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The second SF's number of leaves must be equal to the first SF's number of leaves");
1874: PetscMalloc1(numRootsB,&remotePointsBA);
1875: PetscSFReduceBegin(sfB,MPIU_2INT,remotePointsA,remotePointsBA,MPIU_REPLACE);
1876: PetscSFReduceEnd(sfB,MPIU_2INT,remotePointsA,remotePointsBA,MPIU_REPLACE);
1877: PetscSFCreate(comm,sfBA);
1878: PetscSFSetGraph(*sfBA,numRootsA,numRootsB,NULL,PETSC_COPY_VALUES,remotePointsBA,PETSC_OWN_POINTER);
1879: return(0);
1880: }
1882: /*
1883: PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
1885: Input Parameters:
1886: . sf - The global PetscSF
1888: Output Parameters:
1889: . out - The local PetscSF
1890: */
1891: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
1892: {
1893: MPI_Comm comm;
1894: PetscMPIInt myrank;
1895: const PetscInt *ilocal;
1896: const PetscSFNode *iremote;
1897: PetscInt i,j,nroots,nleaves,lnleaves,*lilocal;
1898: PetscSFNode *liremote;
1899: PetscSF lsf;
1900: PetscErrorCode ierr;
1904: if (sf->ops->CreateLocalSF) {
1905: (*sf->ops->CreateLocalSF)(sf,out);
1906: } else {
1907: /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
1908: PetscObjectGetComm((PetscObject)sf,&comm);
1909: MPI_Comm_rank(comm,&myrank);
1911: /* Find out local edges and build a local SF */
1912: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
1913: for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
1914: PetscMalloc1(lnleaves,&lilocal);
1915: PetscMalloc1(lnleaves,&liremote);
1917: for (i=j=0; i<nleaves; i++) {
1918: if (iremote[i].rank == (PetscInt)myrank) {
1919: lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
1920: liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */
1921: liremote[j].index = iremote[i].index;
1922: j++;
1923: }
1924: }
1925: PetscSFCreate(PETSC_COMM_SELF,&lsf);
1926: PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);
1927: PetscSFSetUp(lsf);
1928: *out = lsf;
1929: }
1930: return(0);
1931: }
1933: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
1934: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
1935: {
1936: PetscErrorCode ierr;
1937: PetscMemType rootmtype,leafmtype;
1941: PetscSFSetUp(sf);
1942: PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1943: PetscGetMemType(rootdata,&rootmtype);
1944: PetscGetMemType(leafdata,&leafmtype);
1945: if (sf->ops->BcastToZero) {
1946: (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);
1947: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
1948: PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1949: return(0);
1950: }