Actual source code: sf.c
petsc-3.13.6 2020-09-29
1: #include <petsc/private/sfimpl.h>
2: #include <petsc/private/hashseti.h>
3: #include <petscctable.h>
5: #if defined(PETSC_USE_DEBUG)
6: # define PetscSFCheckGraphSet(sf,arg) do { \
7: if (PetscUnlikely(!(sf)->graphset)) \
8: SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
9: } while (0)
10: #else
11: # define PetscSFCheckGraphSet(sf,arg) do {} while (0)
12: #endif
14: const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0};
16: /*@
17: PetscSFCreate - create a star forest communication context
19: Collective
21: Input Arguments:
22: . comm - communicator on which the star forest will operate
24: Output Arguments:
25: . sf - new star forest context
27: Options Database Keys:
28: + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default)
29: . -sf_type window -Use MPI-3 one-sided window for communication
30: - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication
32: Level: intermediate
34: Notes:
35: When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
36: MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
37: SFs are optimized and they have better performance than general SFs.
39: .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
40: @*/
41: PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
42: {
44: PetscSF b;
48: PetscSFInitializePackage();
50: PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);
52: b->nroots = -1;
53: b->nleaves = -1;
54: b->minleaf = PETSC_MAX_INT;
55: b->maxleaf = PETSC_MIN_INT;
56: b->nranks = -1;
57: b->rankorder = PETSC_TRUE;
58: b->ingroup = MPI_GROUP_NULL;
59: b->outgroup = MPI_GROUP_NULL;
60: b->graphset = PETSC_FALSE;
62: *sf = b;
63: return(0);
64: }
66: /*@
67: PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
69: Collective
71: Input Arguments:
72: . sf - star forest
74: Level: advanced
76: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
77: @*/
78: PetscErrorCode PetscSFReset(PetscSF sf)
79: {
84: if (sf->ops->Reset) {(*sf->ops->Reset)(sf);}
85: sf->nroots = -1;
86: sf->nleaves = -1;
87: sf->minleaf = PETSC_MAX_INT;
88: sf->maxleaf = PETSC_MIN_INT;
89: sf->mine = NULL;
90: sf->remote = NULL;
91: sf->graphset = PETSC_FALSE;
92: PetscFree(sf->mine_alloc);
93: PetscFree(sf->remote_alloc);
94: sf->nranks = -1;
95: PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);
96: #if defined(PETSC_HAVE_CUDA)
97: {
98: PetscInt i;
99: for (i=0; i<2; i++) {if (sf->rmine_d[i]) {cudaError_t err = cudaFree(sf->rmine_d[i]);CHKERRCUDA(err);sf->rmine_d[i]=NULL;}}
100: }
101: #endif
102: sf->degreeknown = PETSC_FALSE;
103: PetscFree(sf->degree);
104: if (sf->ingroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->ingroup);}
105: if (sf->outgroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->outgroup);}
106: PetscSFDestroy(&sf->multi);
107: PetscLayoutDestroy(&sf->map);
108: sf->setupcalled = PETSC_FALSE;
109: return(0);
110: }
112: /*@C
113: PetscSFSetType - Set the PetscSF communication implementation
115: Collective on PetscSF
117: Input Parameters:
118: + sf - the PetscSF context
119: - type - a known method
121: Options Database Key:
122: . -sf_type <type> - Sets the method; use -help for a list
123: of available methods (for instance, window, basic, neighbor)
125: Notes:
126: See "include/petscsf.h" for available methods (for instance)
127: + PETSCSFWINDOW - MPI-2/3 one-sided
128: - PETSCSFBASIC - basic implementation using MPI-1 two-sided
130: Level: intermediate
132: .seealso: PetscSFType, PetscSFCreate()
133: @*/
134: PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
135: {
136: PetscErrorCode ierr,(*r)(PetscSF);
137: PetscBool match;
143: PetscObjectTypeCompare((PetscObject)sf,type,&match);
144: if (match) return(0);
146: PetscFunctionListFind(PetscSFList,type,&r);
147: if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
148: /* Destroy the previous PetscSF implementation context */
149: if (sf->ops->Destroy) {(*(sf)->ops->Destroy)(sf);}
150: PetscMemzero(sf->ops,sizeof(*sf->ops));
151: PetscObjectChangeTypeName((PetscObject)sf,type);
152: (*r)(sf);
153: return(0);
154: }
156: /*@C
157: PetscSFGetType - Get the PetscSF communication implementation
159: Not Collective
161: Input Parameter:
162: . sf - the PetscSF context
164: Output Parameter:
165: . type - the PetscSF type name
167: Level: intermediate
169: .seealso: PetscSFSetType(), PetscSFCreate()
170: @*/
171: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
172: {
176: *type = ((PetscObject)sf)->type_name;
177: return(0);
178: }
180: /*@
181: PetscSFDestroy - destroy star forest
183: Collective
185: Input Arguments:
186: . sf - address of star forest
188: Level: intermediate
190: .seealso: PetscSFCreate(), PetscSFReset()
191: @*/
192: PetscErrorCode PetscSFDestroy(PetscSF *sf)
193: {
197: if (!*sf) return(0);
199: if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; return(0);}
200: PetscSFReset(*sf);
201: if ((*sf)->ops->Destroy) {(*(*sf)->ops->Destroy)(*sf);}
202: PetscHeaderDestroy(sf);
203: return(0);
204: }
206: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
207: {
208: #if defined(PETSC_USE_DEBUG)
209: PetscInt i, nleaves;
210: PetscMPIInt size;
211: const PetscInt *ilocal;
212: const PetscSFNode *iremote;
213: PetscErrorCode ierr;
216: if (!sf->graphset) return(0);
217: PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);
218: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
219: for (i = 0; i < nleaves; i++) {
220: const PetscInt rank = iremote[i].rank;
221: const PetscInt remote = iremote[i].index;
222: const PetscInt leaf = ilocal ? ilocal[i] : i;
223: if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
224: if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
225: if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
226: }
227: return(0);
228: #else
230: return(0);
231: #endif
232: }
234: /*@
235: PetscSFSetUp - set up communication structures
237: Collective
239: Input Arguments:
240: . sf - star forest communication object
242: Level: beginner
244: .seealso: PetscSFSetFromOptions(), PetscSFSetType()
245: @*/
246: PetscErrorCode PetscSFSetUp(PetscSF sf)
247: {
252: PetscSFCheckGraphSet(sf,1);
253: if (sf->setupcalled) return(0);
254: PetscSFCheckGraphValid_Private(sf);
255: sf->use_gpu_aware_mpi = use_gpu_aware_mpi;
256: if (!((PetscObject)sf)->type_name) {PetscSFSetType(sf,PETSCSFBASIC);}
257: PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);
258: if (sf->ops->SetUp) {(*sf->ops->SetUp)(sf);}
259: PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);
260: sf->setupcalled = PETSC_TRUE;
261: return(0);
262: }
264: /*@
265: PetscSFSetFromOptions - set PetscSF options using the options database
267: Logically Collective
269: Input Arguments:
270: . sf - star forest
272: Options Database Keys:
273: + -sf_type - implementation type, see PetscSFSetType()
274: . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
275: . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
276: use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
277: If true, this option only works with -use_gpu_aware_mpi 1.
278: - -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
279: If true, this option only works with -use_gpu_aware_mpi 1.
281: Level: intermediate
282: @*/
283: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
284: {
285: PetscSFType deft;
286: char type[256];
288: PetscBool flg;
292: PetscObjectOptionsBegin((PetscObject)sf);
293: deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
294: PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);
295: PetscSFSetType(sf,flg ? type : deft);
296: PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);
298: #if defined(PETSC_HAVE_CUDA)
299: sf->use_default_stream = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */
300: PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);
301: sf->use_stream_aware_mpi = PETSC_FALSE;
302: PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);
303: #endif
304: if (sf->ops->SetFromOptions) {(*sf->ops->SetFromOptions)(PetscOptionsObject,sf);}
305: PetscOptionsEnd();
306: return(0);
307: }
309: /*@
310: PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
312: Logically Collective
314: Input Arguments:
315: + sf - star forest
316: - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
318: Level: advanced
320: .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
321: @*/
322: PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
323: {
327: if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
328: sf->rankorder = flg;
329: return(0);
330: }
332: /*@
333: PetscSFSetGraph - Set a parallel star forest
335: Collective
337: Input Arguments:
338: + sf - star forest
339: . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
340: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
341: . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
342: during setup in debug mode)
343: . localmode - copy mode for ilocal
344: . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
345: during setup in debug mode)
346: - remotemode - copy mode for iremote
348: Level: intermediate
350: Notes:
351: In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
353: Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
354: encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
355: needed
357: Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
358: indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).
360: .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
361: @*/
362: PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
363: {
370: if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
371: if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
373: if (sf->nroots >= 0) { /* Reset only if graph already set */
374: PetscSFReset(sf);
375: }
377: PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);
379: sf->nroots = nroots;
380: sf->nleaves = nleaves;
382: if (nleaves && ilocal) {
383: PetscInt i;
384: PetscInt minleaf = PETSC_MAX_INT;
385: PetscInt maxleaf = PETSC_MIN_INT;
386: int contiguous = 1;
387: for (i=0; i<nleaves; i++) {
388: minleaf = PetscMin(minleaf,ilocal[i]);
389: maxleaf = PetscMax(maxleaf,ilocal[i]);
390: contiguous &= (ilocal[i] == i);
391: }
392: sf->minleaf = minleaf;
393: sf->maxleaf = maxleaf;
394: if (contiguous) {
395: if (localmode == PETSC_OWN_POINTER) {
396: PetscFree(ilocal);
397: }
398: ilocal = NULL;
399: }
400: } else {
401: sf->minleaf = 0;
402: sf->maxleaf = nleaves - 1;
403: }
405: if (ilocal) {
406: switch (localmode) {
407: case PETSC_COPY_VALUES:
408: PetscMalloc1(nleaves,&sf->mine_alloc);
409: PetscArraycpy(sf->mine_alloc,ilocal,nleaves);
410: sf->mine = sf->mine_alloc;
411: break;
412: case PETSC_OWN_POINTER:
413: sf->mine_alloc = (PetscInt*)ilocal;
414: sf->mine = sf->mine_alloc;
415: break;
416: case PETSC_USE_POINTER:
417: sf->mine_alloc = NULL;
418: sf->mine = (PetscInt*)ilocal;
419: break;
420: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
421: }
422: }
424: switch (remotemode) {
425: case PETSC_COPY_VALUES:
426: PetscMalloc1(nleaves,&sf->remote_alloc);
427: PetscArraycpy(sf->remote_alloc,iremote,nleaves);
428: sf->remote = sf->remote_alloc;
429: break;
430: case PETSC_OWN_POINTER:
431: sf->remote_alloc = (PetscSFNode*)iremote;
432: sf->remote = sf->remote_alloc;
433: break;
434: case PETSC_USE_POINTER:
435: sf->remote_alloc = NULL;
436: sf->remote = (PetscSFNode*)iremote;
437: break;
438: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
439: }
441: PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);
442: sf->graphset = PETSC_TRUE;
443: return(0);
444: }
446: /*@
447: PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
449: Collective
451: Input Parameters:
452: + sf - The PetscSF
453: . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
454: - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
456: Notes:
457: It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
458: n and N are local and global sizes of x respectively.
460: With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
461: sequential vectors y on all ranks.
463: With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
464: sequential vector y on rank 0.
466: In above cases, entries of x are roots and entries of y are leaves.
468: With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
469: creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
470: of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
471: not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
472: items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
474: In this case, roots and leaves are symmetric.
476: Level: intermediate
477: @*/
478: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
479: {
480: MPI_Comm comm;
481: PetscInt n,N,res[2];
482: PetscMPIInt rank,size;
483: PetscSFType type;
487: PetscObjectGetComm((PetscObject)sf, &comm);
488: if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
489: MPI_Comm_rank(comm,&rank);
490: MPI_Comm_size(comm,&size);
492: if (pattern == PETSCSF_PATTERN_ALLTOALL) {
493: type = PETSCSFALLTOALL;
494: PetscLayoutCreate(comm,&sf->map);
495: PetscLayoutSetLocalSize(sf->map,size);
496: PetscLayoutSetSize(sf->map,((PetscInt)size)*size);
497: PetscLayoutSetUp(sf->map);
498: } else {
499: PetscLayoutGetLocalSize(map,&n);
500: PetscLayoutGetSize(map,&N);
501: res[0] = n;
502: res[1] = -n;
503: /* Check if n are same over all ranks so that we can optimize it */
504: MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);
505: if (res[0] == -res[1]) { /* same n */
506: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
507: } else {
508: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
509: }
510: PetscLayoutReference(map,&sf->map);
511: }
512: PetscSFSetType(sf,type);
514: sf->pattern = pattern;
515: sf->mine = NULL; /* Contiguous */
517: /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
518: Also set other easy stuff.
519: */
520: if (pattern == PETSCSF_PATTERN_ALLGATHER) {
521: sf->nleaves = N;
522: sf->nroots = n;
523: sf->nranks = size;
524: sf->minleaf = 0;
525: sf->maxleaf = N - 1;
526: } else if (pattern == PETSCSF_PATTERN_GATHER) {
527: sf->nleaves = rank ? 0 : N;
528: sf->nroots = n;
529: sf->nranks = rank ? 0 : size;
530: sf->minleaf = 0;
531: sf->maxleaf = rank ? -1 : N - 1;
532: } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
533: sf->nleaves = size;
534: sf->nroots = size;
535: sf->nranks = size;
536: sf->minleaf = 0;
537: sf->maxleaf = size - 1;
538: }
539: sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
540: sf->graphset = PETSC_TRUE;
541: return(0);
542: }
544: /*@
545: PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
547: Collective
549: Input Arguments:
551: . sf - star forest to invert
553: Output Arguments:
554: . isf - inverse of sf
555: Level: advanced
557: Notes:
558: All roots must have degree 1.
560: The local space may be a permutation, but cannot be sparse.
562: .seealso: PetscSFSetGraph()
563: @*/
564: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
565: {
567: PetscMPIInt rank;
568: PetscInt i,nroots,nleaves,maxlocal,count,*newilocal;
569: const PetscInt *ilocal;
570: PetscSFNode *roots,*leaves;
574: PetscSFCheckGraphSet(sf,1);
577: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);
578: maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
580: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
581: PetscMalloc2(nroots,&roots,maxlocal,&leaves);
582: for (i=0; i<maxlocal; i++) {
583: leaves[i].rank = rank;
584: leaves[i].index = i;
585: }
586: for (i=0; i <nroots; i++) {
587: roots[i].rank = -1;
588: roots[i].index = -1;
589: }
590: PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);
591: PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);
593: /* Check whether our leaves are sparse */
594: for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
595: if (count == nroots) newilocal = NULL;
596: else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
597: PetscMalloc1(count,&newilocal);
598: for (i=0,count=0; i<nroots; i++) {
599: if (roots[i].rank >= 0) {
600: newilocal[count] = i;
601: roots[count].rank = roots[i].rank;
602: roots[count].index = roots[i].index;
603: count++;
604: }
605: }
606: }
608: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);
609: PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);
610: PetscFree2(roots,leaves);
611: return(0);
612: }
614: /*@
615: PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
617: Collective
619: Input Arguments:
620: + sf - communication object to duplicate
621: - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
623: Output Arguments:
624: . newsf - new communication object
626: Level: beginner
628: .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
629: @*/
630: PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
631: {
632: PetscSFType type;
639: PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);
640: PetscSFGetType(sf,&type);
641: if (type) {PetscSFSetType(*newsf,type);}
642: if (opt == PETSCSF_DUPLICATE_GRAPH) {
643: PetscSFCheckGraphSet(sf,1);
644: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
645: PetscInt nroots,nleaves;
646: const PetscInt *ilocal;
647: const PetscSFNode *iremote;
648: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
649: PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);
650: } else {
651: PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);
652: }
653: }
654: if (sf->ops->Duplicate) {(*sf->ops->Duplicate)(sf,opt,*newsf);}
655: return(0);
656: }
658: /*@C
659: PetscSFGetGraph - Get the graph specifying a parallel star forest
661: Not Collective
663: Input Arguments:
664: . sf - star forest
666: Output Arguments:
667: + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
668: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
669: . ilocal - locations of leaves in leafdata buffers
670: - iremote - remote locations of root vertices for each leaf on the current process
672: Notes:
673: We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
675: When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you
676: want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.
678: Level: intermediate
680: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
681: @*/
682: PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
683: {
688: if (sf->ops->GetGraph) {
689: (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);
690: } else {
691: if (nroots) *nroots = sf->nroots;
692: if (nleaves) *nleaves = sf->nleaves;
693: if (ilocal) *ilocal = sf->mine;
694: if (iremote) *iremote = sf->remote;
695: }
696: return(0);
697: }
699: /*@
700: PetscSFGetLeafRange - Get the active leaf ranges
702: Not Collective
704: Input Arguments:
705: . sf - star forest
707: Output Arguments:
708: + minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
709: - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
711: Level: developer
713: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
714: @*/
715: PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
716: {
720: PetscSFCheckGraphSet(sf,1);
721: if (minleaf) *minleaf = sf->minleaf;
722: if (maxleaf) *maxleaf = sf->maxleaf;
723: return(0);
724: }
726: /*@C
727: PetscSFViewFromOptions - View from Options
729: Collective on PetscSF
731: Input Parameters:
732: + A - the star forest
733: . obj - Optional object
734: - name - command line option
736: Level: intermediate
737: .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
738: @*/
739: PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
740: {
745: PetscObjectViewFromOptions((PetscObject)A,obj,name);
746: return(0);
747: }
749: /*@C
750: PetscSFView - view a star forest
752: Collective
754: Input Arguments:
755: + sf - star forest
756: - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
758: Level: beginner
760: .seealso: PetscSFCreate(), PetscSFSetGraph()
761: @*/
762: PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
763: {
764: PetscErrorCode ierr;
765: PetscBool iascii;
766: PetscViewerFormat format;
770: if (!viewer) {PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);}
773: if (sf->graphset) {PetscSFSetUp(sf);}
774: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
775: if (iascii) {
776: PetscMPIInt rank;
777: PetscInt ii,i,j;
779: PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);
780: PetscViewerASCIIPushTab(viewer);
781: if (sf->ops->View) {(*sf->ops->View)(sf,viewer);}
782: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
783: if (!sf->graphset) {
784: PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");
785: PetscViewerASCIIPopTab(viewer);
786: return(0);
787: }
788: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
789: PetscViewerASCIIPushSynchronized(viewer);
790: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);
791: for (i=0; i<sf->nleaves; i++) {
792: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);
793: }
794: PetscViewerFlush(viewer);
795: PetscViewerGetFormat(viewer,&format);
796: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
797: PetscMPIInt *tmpranks,*perm;
798: PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);
799: PetscArraycpy(tmpranks,sf->ranks,sf->nranks);
800: for (i=0; i<sf->nranks; i++) perm[i] = i;
801: PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);
802: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);
803: for (ii=0; ii<sf->nranks; ii++) {
804: i = perm[ii];
805: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);
806: for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
807: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);
808: }
809: }
810: PetscFree2(tmpranks,perm);
811: }
812: PetscViewerFlush(viewer);
813: PetscViewerASCIIPopSynchronized(viewer);
814: }
815: PetscViewerASCIIPopTab(viewer);
816: }
817: return(0);
818: }
820: /*@C
821: PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
823: Not Collective
825: Input Arguments:
826: . sf - star forest
828: Output Arguments:
829: + nranks - number of ranks referenced by local part
830: . ranks - array of ranks
831: . roffset - offset in rmine/rremote for each rank (length nranks+1)
832: . rmine - concatenated array holding local indices referencing each remote rank
833: - rremote - concatenated array holding remote indices referenced for each remote rank
835: Level: developer
837: .seealso: PetscSFGetLeafRanks()
838: @*/
839: PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
840: {
845: if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
846: if (sf->ops->GetRootRanks) {
847: (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);
848: } else {
849: /* The generic implementation */
850: if (nranks) *nranks = sf->nranks;
851: if (ranks) *ranks = sf->ranks;
852: if (roffset) *roffset = sf->roffset;
853: if (rmine) *rmine = sf->rmine;
854: if (rremote) *rremote = sf->rremote;
855: }
856: return(0);
857: }
859: /*@C
860: PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
862: Not Collective
864: Input Arguments:
865: . sf - star forest
867: Output Arguments:
868: + niranks - number of leaf ranks referencing roots on this process
869: . iranks - array of ranks
870: . ioffset - offset in irootloc for each rank (length niranks+1)
871: - irootloc - concatenated array holding local indices of roots referenced by each leaf rank
873: Level: developer
875: .seealso: PetscSFGetRootRanks()
876: @*/
877: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
878: {
883: if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
884: if (sf->ops->GetLeafRanks) {
885: (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);
886: } else {
887: PetscSFType type;
888: PetscSFGetType(sf,&type);
889: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
890: }
891: return(0);
892: }
894: static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
895: PetscInt i;
896: for (i=0; i<n; i++) {
897: if (needle == list[i]) return PETSC_TRUE;
898: }
899: return PETSC_FALSE;
900: }
902: /*@C
903: PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
905: Collective
907: Input Arguments:
908: + sf - PetscSF to set up; PetscSFSetGraph() must have been called
909: - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
911: Level: developer
913: .seealso: PetscSFGetRootRanks()
914: @*/
915: PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
916: {
917: PetscErrorCode ierr;
918: PetscTable table;
919: PetscTablePosition pos;
920: PetscMPIInt size,groupsize,*groupranks;
921: PetscInt *rcount,*ranks;
922: PetscInt i, irank = -1,orank = -1;
926: PetscSFCheckGraphSet(sf,1);
927: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
928: PetscTableCreate(10,size,&table);
929: for (i=0; i<sf->nleaves; i++) {
930: /* Log 1-based rank */
931: PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);
932: }
933: PetscTableGetCount(table,&sf->nranks);
934: PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);
935: PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);
936: PetscTableGetHeadPosition(table,&pos);
937: for (i=0; i<sf->nranks; i++) {
938: PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);
939: ranks[i]--; /* Convert back to 0-based */
940: }
941: PetscTableDestroy(&table);
943: /* We expect that dgroup is reliably "small" while nranks could be large */
944: {
945: MPI_Group group = MPI_GROUP_NULL;
946: PetscMPIInt *dgroupranks;
947: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
948: MPI_Group_size(dgroup,&groupsize);
949: PetscMalloc1(groupsize,&dgroupranks);
950: PetscMalloc1(groupsize,&groupranks);
951: for (i=0; i<groupsize; i++) dgroupranks[i] = i;
952: if (groupsize) {MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);}
953: MPI_Group_free(&group);
954: PetscFree(dgroupranks);
955: }
957: /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
958: for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) {
959: for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
960: if (InList(ranks[i],groupsize,groupranks)) break;
961: }
962: for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
963: if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
964: }
965: if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
966: PetscInt tmprank,tmpcount;
968: tmprank = ranks[i];
969: tmpcount = rcount[i];
970: ranks[i] = ranks[sf->ndranks];
971: rcount[i] = rcount[sf->ndranks];
972: ranks[sf->ndranks] = tmprank;
973: rcount[sf->ndranks] = tmpcount;
974: sf->ndranks++;
975: }
976: }
977: PetscFree(groupranks);
978: PetscSortIntWithArray(sf->ndranks,ranks,rcount);
979: PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);
980: sf->roffset[0] = 0;
981: for (i=0; i<sf->nranks; i++) {
982: PetscMPIIntCast(ranks[i],sf->ranks+i);
983: sf->roffset[i+1] = sf->roffset[i] + rcount[i];
984: rcount[i] = 0;
985: }
986: for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
987: /* short circuit */
988: if (orank != sf->remote[i].rank) {
989: /* Search for index of iremote[i].rank in sf->ranks */
990: PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);
991: if (irank < 0) {
992: PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);
993: if (irank >= 0) irank += sf->ndranks;
994: }
995: orank = sf->remote[i].rank;
996: }
997: if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
998: sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i;
999: sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1000: rcount[irank]++;
1001: }
1002: PetscFree2(rcount,ranks);
1003: return(0);
1004: }
1006: /*@C
1007: PetscSFGetGroups - gets incoming and outgoing process groups
1009: Collective
1011: Input Argument:
1012: . sf - star forest
1014: Output Arguments:
1015: + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1016: - outgoing - group of destination processes for outgoing edges (roots that I reference)
1018: Level: developer
1020: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1021: @*/
1022: PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1023: {
1025: MPI_Group group = MPI_GROUP_NULL;
1028: if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1029: if (sf->ingroup == MPI_GROUP_NULL) {
1030: PetscInt i;
1031: const PetscInt *indegree;
1032: PetscMPIInt rank,*outranks,*inranks;
1033: PetscSFNode *remote;
1034: PetscSF bgcount;
1036: /* Compute the number of incoming ranks */
1037: PetscMalloc1(sf->nranks,&remote);
1038: for (i=0; i<sf->nranks; i++) {
1039: remote[i].rank = sf->ranks[i];
1040: remote[i].index = 0;
1041: }
1042: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);
1043: PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1044: PetscSFComputeDegreeBegin(bgcount,&indegree);
1045: PetscSFComputeDegreeEnd(bgcount,&indegree);
1046: /* Enumerate the incoming ranks */
1047: PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);
1048: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1049: for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1050: PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);
1051: PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);
1052: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1053: MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);
1054: MPI_Group_free(&group);
1055: PetscFree2(inranks,outranks);
1056: PetscSFDestroy(&bgcount);
1057: }
1058: *incoming = sf->ingroup;
1060: if (sf->outgroup == MPI_GROUP_NULL) {
1061: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1062: MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);
1063: MPI_Group_free(&group);
1064: }
1065: *outgoing = sf->outgroup;
1066: return(0);
1067: }
1069: /*@
1070: PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
1072: Collective
1074: Input Argument:
1075: . sf - star forest that may contain roots with 0 or with more than 1 vertex
1077: Output Arguments:
1078: . multi - star forest with split roots, such that each root has degree exactly 1
1080: Level: developer
1082: Notes:
1084: In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1085: directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1086: edge, it is a candidate for future optimization that might involve its removal.
1088: .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1089: @*/
1090: PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1091: {
1097: if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1098: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1099: *multi = sf->multi;
1100: return(0);
1101: }
1102: if (!sf->multi) {
1103: const PetscInt *indegree;
1104: PetscInt i,*inoffset,*outones,*outoffset,maxlocal;
1105: PetscSFNode *remote;
1106: maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1107: PetscSFComputeDegreeBegin(sf,&indegree);
1108: PetscSFComputeDegreeEnd(sf,&indegree);
1109: PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);
1110: inoffset[0] = 0;
1111: for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1112: for (i=0; i<maxlocal; i++) outones[i] = 1;
1113: PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1114: PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1115: for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1116: #if defined(PETSC_USE_DEBUG) /* Check that the expected number of increments occurred */
1117: for (i=0; i<sf->nroots; i++) {
1118: if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1119: }
1120: #endif
1121: PetscMalloc1(sf->nleaves,&remote);
1122: for (i=0; i<sf->nleaves; i++) {
1123: remote[i].rank = sf->remote[i].rank;
1124: remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1125: }
1126: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1127: PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1128: if (sf->rankorder) { /* Sort the ranks */
1129: PetscMPIInt rank;
1130: PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1131: PetscSFNode *newremote;
1132: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1133: for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1134: PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);
1135: for (i=0; i<maxlocal; i++) outranks[i] = rank;
1136: PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);
1137: PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);
1138: /* Sort the incoming ranks at each vertex, build the inverse map */
1139: for (i=0; i<sf->nroots; i++) {
1140: PetscInt j;
1141: for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1142: PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);
1143: for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1144: }
1145: PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);
1146: PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);
1147: PetscMalloc1(sf->nleaves,&newremote);
1148: for (i=0; i<sf->nleaves; i++) {
1149: newremote[i].rank = sf->remote[i].rank;
1150: newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1151: }
1152: PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);
1153: PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);
1154: }
1155: PetscFree3(inoffset,outones,outoffset);
1156: }
1157: *multi = sf->multi;
1158: return(0);
1159: }
1161: /*@C
1162: PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
1164: Collective
1166: Input Arguments:
1167: + sf - original star forest
1168: . nselected - number of selected roots on this process
1169: - selected - indices of the selected roots on this process
1171: Output Arguments:
1172: . esf - new star forest
1174: Level: advanced
1176: Note:
1177: To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1178: be done by calling PetscSFGetGraph().
1180: .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1181: @*/
1182: PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1183: {
1184: PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1185: const PetscInt *ilocal;
1186: signed char *rootdata,*leafdata,*leafmem;
1187: const PetscSFNode *iremote;
1188: PetscSFNode *new_iremote;
1189: MPI_Comm comm;
1190: PetscErrorCode ierr;
1194: PetscSFCheckGraphSet(sf,1);
1198: PetscSFSetUp(sf);
1200: PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);
1201: PetscObjectGetComm((PetscObject)sf,&comm);
1202: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
1204: #if defined(PETSC_USE_DEBUG)
1205: /* Error out if selected[] has dups or out of range indices */
1206: {
1207: PetscBool dups;
1209: if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1210: for (i=0; i<nselected; i++)
1211: if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots);
1212: }
1213: #endif
1215: if (sf->ops->CreateEmbeddedSF) {
1216: (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);
1217: } else {
1218: /* A generic version of creating embedded sf */
1219: PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
1220: maxlocal = maxleaf - minleaf + 1;
1221: PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);
1222: leafdata = leafmem - minleaf;
1223: /* Tag selected roots and bcast to leaves */
1224: for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1225: PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);
1226: PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);
1228: /* Build esf with leaves that are still connected */
1229: esf_nleaves = 0;
1230: for (i=0; i<nleaves; i++) {
1231: j = ilocal ? ilocal[i] : i;
1232: /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1233: with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1234: */
1235: esf_nleaves += (leafdata[j] ? 1 : 0);
1236: }
1237: PetscMalloc1(esf_nleaves,&new_ilocal);
1238: PetscMalloc1(esf_nleaves,&new_iremote);
1239: for (i=n=0; i<nleaves; i++) {
1240: j = ilocal ? ilocal[i] : i;
1241: if (leafdata[j]) {
1242: new_ilocal[n] = j;
1243: new_iremote[n].rank = iremote[i].rank;
1244: new_iremote[n].index = iremote[i].index;
1245: ++n;
1246: }
1247: }
1248: PetscSFCreate(comm,esf);
1249: PetscSFSetFromOptions(*esf);
1250: PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1251: PetscFree2(rootdata,leafmem);
1252: }
1253: PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);
1254: return(0);
1255: }
1257: /*@C
1258: PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1260: Collective
1262: Input Arguments:
1263: + sf - original star forest
1264: . nselected - number of selected leaves on this process
1265: - selected - indices of the selected leaves on this process
1267: Output Arguments:
1268: . newsf - new star forest
1270: Level: advanced
1272: .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1273: @*/
1274: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1275: {
1276: const PetscSFNode *iremote;
1277: PetscSFNode *new_iremote;
1278: const PetscInt *ilocal;
1279: PetscInt i,nroots,*leaves,*new_ilocal;
1280: MPI_Comm comm;
1281: PetscErrorCode ierr;
1285: PetscSFCheckGraphSet(sf,1);
1289: /* Uniq selected[] and put results in leaves[] */
1290: PetscObjectGetComm((PetscObject)sf,&comm);
1291: PetscMalloc1(nselected,&leaves);
1292: PetscArraycpy(leaves,selected,nselected);
1293: PetscSortedRemoveDupsInt(&nselected,leaves);
1294: if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);
1296: /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1297: if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1298: (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);
1299: } else {
1300: PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);
1301: PetscMalloc1(nselected,&new_ilocal);
1302: PetscMalloc1(nselected,&new_iremote);
1303: for (i=0; i<nselected; ++i) {
1304: const PetscInt l = leaves[i];
1305: new_ilocal[i] = ilocal ? ilocal[l] : l;
1306: new_iremote[i].rank = iremote[l].rank;
1307: new_iremote[i].index = iremote[l].index;
1308: }
1309: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);
1310: PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1311: }
1312: PetscFree(leaves);
1313: return(0);
1314: }
1316: /*@C
1317: PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()
1319: Collective on PetscSF
1321: Input Arguments:
1322: + sf - star forest on which to communicate
1323: . unit - data type associated with each node
1324: . rootdata - buffer to broadcast
1325: - op - operation to use for reduction
1327: Output Arguments:
1328: . leafdata - buffer to be reduced with values from each leaf's respective root
1330: Level: intermediate
1332: .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd()
1333: @*/
1334: PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1335: {
1337: PetscMemType rootmtype,leafmtype;
1341: PetscSFSetUp(sf);
1342: PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1343: PetscGetMemType(rootdata,&rootmtype);
1344: PetscGetMemType(leafdata,&leafmtype);
1345: (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1346: PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);
1347: return(0);
1348: }
1350: /*@C
1351: PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()
1353: Collective
1355: Input Arguments:
1356: + sf - star forest
1357: . unit - data type
1358: . rootdata - buffer to broadcast
1359: - op - operation to use for reduction
1361: Output Arguments:
1362: . leafdata - buffer to be reduced with values from each leaf's respective root
1364: Level: intermediate
1366: .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1367: @*/
1368: PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1369: {
1374: PetscSFSetUp(sf);
1375: PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);
1376: (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);
1377: PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);
1378: return(0);
1379: }
1381: /*@C
1382: PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1384: Collective
1386: Input Arguments:
1387: + sf - star forest
1388: . unit - data type
1389: . leafdata - values to reduce
1390: - op - reduction operation
1392: Output Arguments:
1393: . rootdata - result of reduction of values from all leaves of each root
1395: Level: intermediate
1397: .seealso: PetscSFBcastBegin()
1398: @*/
1399: PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1400: {
1402: PetscMemType rootmtype,leafmtype;
1406: PetscSFSetUp(sf);
1407: PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);
1408: PetscGetMemType(rootdata,&rootmtype);
1409: PetscGetMemType(leafdata,&leafmtype);
1410: (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1411: PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);
1412: return(0);
1413: }
1415: /*@C
1416: PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1418: Collective
1420: Input Arguments:
1421: + sf - star forest
1422: . unit - data type
1423: . leafdata - values to reduce
1424: - op - reduction operation
1426: Output Arguments:
1427: . rootdata - result of reduction of values from all leaves of each root
1429: Level: intermediate
1431: .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1432: @*/
1433: PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1434: {
1439: PetscSFSetUp(sf);
1440: PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);
1441: (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);
1442: PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);
1443: return(0);
1444: }
1446: /*@C
1447: PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1449: Collective
1451: Input Arguments:
1452: + sf - star forest
1453: . unit - data type
1454: . leafdata - leaf values to use in reduction
1455: - op - operation to use for reduction
1457: Output Arguments:
1458: + rootdata - root values to be updated, input state is seen by first process to perform an update
1459: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1461: Level: advanced
1463: Note:
1464: The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1465: might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1466: not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1467: integers.
1469: .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1470: @*/
1471: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1472: {
1474: PetscMemType rootmtype,leafmtype,leafupdatemtype;
1478: PetscSFSetUp(sf);
1479: PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1480: PetscGetMemType(rootdata,&rootmtype);
1481: PetscGetMemType(leafdata,&leafmtype);
1482: PetscGetMemType(leafupdate,&leafupdatemtype);
1483: if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1484: (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1485: PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1486: return(0);
1487: }
1489: /*@C
1490: PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1492: Collective
1494: Input Arguments:
1495: + sf - star forest
1496: . unit - data type
1497: . leafdata - leaf values to use in reduction
1498: - op - operation to use for reduction
1500: Output Arguments:
1501: + rootdata - root values to be updated, input state is seen by first process to perform an update
1502: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1504: Level: advanced
1506: .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1507: @*/
1508: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1509: {
1514: PetscSFSetUp(sf);
1515: PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1516: (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);
1517: PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1518: return(0);
1519: }
1521: /*@C
1522: PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1524: Collective
1526: Input Arguments:
1527: . sf - star forest
1529: Output Arguments:
1530: . degree - degree of each root vertex
1532: Level: advanced
1534: Notes:
1535: The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1537: .seealso: PetscSFGatherBegin()
1538: @*/
1539: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1540: {
1545: PetscSFCheckGraphSet(sf,1);
1547: if (!sf->degreeknown) {
1548: PetscInt i, nroots = sf->nroots, maxlocal;
1549: if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1550: maxlocal = sf->maxleaf-sf->minleaf+1;
1551: PetscMalloc1(nroots,&sf->degree);
1552: PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1553: for (i=0; i<nroots; i++) sf->degree[i] = 0;
1554: for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1555: PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1556: }
1557: *degree = NULL;
1558: return(0);
1559: }
1561: /*@C
1562: PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1564: Collective
1566: Input Arguments:
1567: . sf - star forest
1569: Output Arguments:
1570: . degree - degree of each root vertex
1572: Level: developer
1574: Notes:
1575: The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1577: .seealso:
1578: @*/
1579: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1580: {
1585: PetscSFCheckGraphSet(sf,1);
1587: if (!sf->degreeknown) {
1588: if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1589: PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1590: PetscFree(sf->degreetmp);
1591: sf->degreeknown = PETSC_TRUE;
1592: }
1593: *degree = sf->degree;
1594: return(0);
1595: }
1598: /*@C
1599: PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1600: Each multi-root is assigned index of the corresponding original root.
1602: Collective
1604: Input Arguments:
1605: + sf - star forest
1606: - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1608: Output Arguments:
1609: + nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1610: - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1612: Level: developer
1614: Notes:
1615: The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1617: .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1618: @*/
1619: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1620: {
1621: PetscSF msf;
1622: PetscInt i, j, k, nroots, nmroots;
1623: PetscErrorCode ierr;
1627: PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);
1631: PetscSFGetMultiSF(sf,&msf);
1632: PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);
1633: PetscMalloc1(nmroots, multiRootsOrigNumbering);
1634: for (i=0,j=0,k=0; i<nroots; i++) {
1635: if (!degree[i]) continue;
1636: for (j=0; j<degree[i]; j++,k++) {
1637: (*multiRootsOrigNumbering)[k] = i;
1638: }
1639: }
1640: #if defined(PETSC_USE_DEBUG)
1641: if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1642: #endif
1643: if (nMultiRoots) *nMultiRoots = nmroots;
1644: return(0);
1645: }
1647: /*@C
1648: PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1650: Collective
1652: Input Arguments:
1653: + sf - star forest
1654: . unit - data type
1655: - leafdata - leaf data to gather to roots
1657: Output Argument:
1658: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1660: Level: intermediate
1662: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1663: @*/
1664: PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1665: {
1667: PetscSF multi = NULL;
1671: PetscSFSetUp(sf);
1672: PetscSFGetMultiSF(sf,&multi);
1673: PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);
1674: return(0);
1675: }
1677: /*@C
1678: PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1680: Collective
1682: Input Arguments:
1683: + sf - star forest
1684: . unit - data type
1685: - leafdata - leaf data to gather to roots
1687: Output Argument:
1688: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1690: Level: intermediate
1692: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1693: @*/
1694: PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1695: {
1697: PetscSF multi = NULL;
1701: PetscSFSetUp(sf);
1702: PetscSFGetMultiSF(sf,&multi);
1703: PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);
1704: return(0);
1705: }
1707: /*@C
1708: PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1710: Collective
1712: Input Arguments:
1713: + sf - star forest
1714: . unit - data type
1715: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1717: Output Argument:
1718: . leafdata - leaf data to be update with personal data from each respective root
1720: Level: intermediate
1722: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1723: @*/
1724: PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1725: {
1727: PetscSF multi = NULL;
1731: PetscSFSetUp(sf);
1732: PetscSFGetMultiSF(sf,&multi);
1733: PetscSFBcastBegin(multi,unit,multirootdata,leafdata);
1734: return(0);
1735: }
1737: /*@C
1738: PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1740: Collective
1742: Input Arguments:
1743: + sf - star forest
1744: . unit - data type
1745: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1747: Output Argument:
1748: . leafdata - leaf data to be update with personal data from each respective root
1750: Level: intermediate
1752: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1753: @*/
1754: PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1755: {
1757: PetscSF multi = NULL;
1761: PetscSFSetUp(sf);
1762: PetscSFGetMultiSF(sf,&multi);
1763: PetscSFBcastEnd(multi,unit,multirootdata,leafdata);
1764: return(0);
1765: }
1767: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1768: {
1769: #if defined(PETSC_USE_DEBUG)
1770: PetscInt i, n, nleaves;
1771: const PetscInt *ilocal = NULL;
1772: PetscHSetI seen;
1773: PetscErrorCode ierr;
1776: PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);
1777: PetscHSetICreate(&seen);
1778: for (i = 0; i < nleaves; i++) {
1779: const PetscInt leaf = ilocal ? ilocal[i] : i;
1780: PetscHSetIAdd(seen,leaf);
1781: }
1782: PetscHSetIGetSize(seen,&n);
1783: if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1784: PetscHSetIDestroy(&seen);
1785: return(0);
1786: #else
1788: return(0);
1789: #endif
1790: }
1792: /*@
1793: PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1795: Input Parameters:
1796: + sfA - The first PetscSF
1797: - sfB - The second PetscSF
1799: Output Parameters:
1800: . sfBA - The composite SF
1802: Level: developer
1804: Notes:
1805: Currently, the two SFs must be defined on congruent communicators and they must be true star
1806: forests, i.e. the same leaf is not connected with different roots.
1808: sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1809: a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1810: nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1811: Bcast on sfA, then a Bcast on sfB, on connected nodes.
1813: .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1814: @*/
1815: PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1816: {
1817: PetscErrorCode ierr;
1818: const PetscSFNode *remotePointsA,*remotePointsB;
1819: PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1820: const PetscInt *localPointsA,*localPointsB;
1821: PetscInt *localPointsBA;
1822: PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1823: PetscBool denseB;
1827: PetscSFCheckGraphSet(sfA,1);
1829: PetscSFCheckGraphSet(sfB,2);
1832: PetscSFCheckLeavesUnique_Private(sfA);
1833: PetscSFCheckLeavesUnique_Private(sfB);
1835: PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);
1836: PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);
1837: if (localPointsA) {
1838: PetscMalloc1(numRootsB,&reorderedRemotePointsA);
1839: for (i=0; i<numRootsB; i++) {
1840: reorderedRemotePointsA[i].rank = -1;
1841: reorderedRemotePointsA[i].index = -1;
1842: }
1843: for (i=0; i<numLeavesA; i++) {
1844: if (localPointsA[i] >= numRootsB) continue;
1845: reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1846: }
1847: remotePointsA = reorderedRemotePointsA;
1848: }
1849: PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);
1850: PetscMalloc1(maxleaf-minleaf+1,&leafdataB);
1851: PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);
1852: PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);
1853: PetscFree(reorderedRemotePointsA);
1855: denseB = (PetscBool)!localPointsB;
1856: for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1857: if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1858: else numLeavesBA++;
1859: }
1860: if (denseB) {
1861: localPointsBA = NULL;
1862: remotePointsBA = leafdataB;
1863: } else {
1864: PetscMalloc1(numLeavesBA,&localPointsBA);
1865: PetscMalloc1(numLeavesBA,&remotePointsBA);
1866: for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1867: const PetscInt l = localPointsB ? localPointsB[i] : i;
1869: if (leafdataB[l-minleaf].rank == -1) continue;
1870: remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
1871: localPointsBA[numLeavesBA] = l;
1872: numLeavesBA++;
1873: }
1874: PetscFree(leafdataB);
1875: }
1876: PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
1877: PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
1878: return(0);
1879: }
1881: /*@
1882: PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
1884: Input Parameters:
1885: + sfA - The first PetscSF
1886: - sfB - The second PetscSF
1888: Output Parameters:
1889: . sfBA - The composite SF.
1891: Level: developer
1893: Notes:
1894: Currently, the two SFs must be defined on congruent communicators and they must be true star
1895: forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
1896: second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
1898: sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
1899: a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
1900: roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
1901: a Reduce on sfB, on connected roots.
1903: .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
1904: @*/
1905: PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1906: {
1907: PetscErrorCode ierr;
1908: const PetscSFNode *remotePointsA,*remotePointsB;
1909: PetscSFNode *remotePointsBA;
1910: const PetscInt *localPointsA,*localPointsB;
1911: PetscSFNode *reorderedRemotePointsA = NULL;
1912: PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
1913: MPI_Op op;
1914: #if defined(PETSC_USE_64BIT_INDICES)
1915: PetscBool iswin;
1916: #endif
1920: PetscSFCheckGraphSet(sfA,1);
1922: PetscSFCheckGraphSet(sfB,2);
1925: PetscSFCheckLeavesUnique_Private(sfA);
1926: PetscSFCheckLeavesUnique_Private(sfB);
1928: PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
1929: PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);
1931: /* TODO: Check roots of sfB have degree of 1 */
1932: /* Once we implement it, we can replace the MPI_MAXLOC
1933: with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect.
1934: We use MPI_MAXLOC only to have a deterministic output from this routine if
1935: the root condition is not meet.
1936: */
1937: op = MPI_MAXLOC;
1938: #if defined(PETSC_USE_64BIT_INDICES)
1939: /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
1940: PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);
1941: if (iswin) op = MPIU_REPLACE;
1942: #endif
1944: PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
1945: PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);
1946: for (i=0; i<maxleaf - minleaf + 1; i++) {
1947: reorderedRemotePointsA[i].rank = -1;
1948: reorderedRemotePointsA[i].index = -1;
1949: }
1950: if (localPointsA) {
1951: for (i=0; i<numLeavesA; i++) {
1952: if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
1953: reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
1954: }
1955: } else {
1956: for (i=0; i<numLeavesA; i++) {
1957: if (i > maxleaf || i < minleaf) continue;
1958: reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
1959: }
1960: }
1962: PetscMalloc1(numRootsB,&localPointsBA);
1963: PetscMalloc1(numRootsB,&remotePointsBA);
1964: for (i=0; i<numRootsB; i++) {
1965: remotePointsBA[i].rank = -1;
1966: remotePointsBA[i].index = -1;
1967: }
1969: PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
1970: PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
1971: PetscFree(reorderedRemotePointsA);
1972: for (i=0,numLeavesBA=0; i<numRootsB; i++) {
1973: if (remotePointsBA[i].rank == -1) continue;
1974: remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
1975: remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
1976: localPointsBA[numLeavesBA] = i;
1977: numLeavesBA++;
1978: }
1979: PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
1980: PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
1981: return(0);
1982: }
1984: /*
1985: PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
1987: Input Parameters:
1988: . sf - The global PetscSF
1990: Output Parameters:
1991: . out - The local PetscSF
1992: */
1993: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
1994: {
1995: MPI_Comm comm;
1996: PetscMPIInt myrank;
1997: const PetscInt *ilocal;
1998: const PetscSFNode *iremote;
1999: PetscInt i,j,nroots,nleaves,lnleaves,*lilocal;
2000: PetscSFNode *liremote;
2001: PetscSF lsf;
2002: PetscErrorCode ierr;
2006: if (sf->ops->CreateLocalSF) {
2007: (*sf->ops->CreateLocalSF)(sf,out);
2008: } else {
2009: /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2010: PetscObjectGetComm((PetscObject)sf,&comm);
2011: MPI_Comm_rank(comm,&myrank);
2013: /* Find out local edges and build a local SF */
2014: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
2015: for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2016: PetscMalloc1(lnleaves,&lilocal);
2017: PetscMalloc1(lnleaves,&liremote);
2019: for (i=j=0; i<nleaves; i++) {
2020: if (iremote[i].rank == (PetscInt)myrank) {
2021: lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2022: liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */
2023: liremote[j].index = iremote[i].index;
2024: j++;
2025: }
2026: }
2027: PetscSFCreate(PETSC_COMM_SELF,&lsf);
2028: PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);
2029: PetscSFSetUp(lsf);
2030: *out = lsf;
2031: }
2032: return(0);
2033: }
2035: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2036: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2037: {
2038: PetscErrorCode ierr;
2039: PetscMemType rootmtype,leafmtype;
2043: PetscSFSetUp(sf);
2044: PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);
2045: PetscGetMemType(rootdata,&rootmtype);
2046: PetscGetMemType(leafdata,&leafmtype);
2047: if (sf->ops->BcastToZero) {
2048: (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);
2049: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2050: PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);
2051: return(0);
2052: }